8 Commits

Author SHA1 Message Date
6ce6153ccf 1.4.2
Some checks failed
Default (tags) / security (push) Failing after 17s
Default (tags) / test (push) Failing after 11s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-01-07 05:06:17 +01:00
ec2d4f9fbc fix(core): Fix concurrency and download handling in HandelsRegister class and adjust test cases 2025-01-07 05:06:16 +01:00
a19be31381 1.4.1
Some checks failed
Default (tags) / security (push) Failing after 15s
Default (tags) / test (push) Failing after 12s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-01-04 13:40:50 +01:00
9c3f012da7 fix(core): Fix issues with JSONL data processing and improve error handling in business record validation 2025-01-04 13:40:50 +01:00
8ebbc16bcd 1.4.0
Some checks failed
Default (tags) / security (push) Failing after 16s
Default (tags) / test (push) Failing after 11s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-01-04 02:27:53 +01:00
c177193438 feat(HandelsRegister): Add file download functionality to HandelsRegister 2025-01-04 02:27:53 +01:00
7c07bc59e4 1.3.1
Some checks failed
Default (tags) / security (push) Failing after 15s
Default (tags) / test (push) Failing after 12s
Default (tags) / release (push) Has been skipped
Default (tags) / metadata (push) Has been skipped
2025-01-03 02:19:07 +01:00
e4a8d371f7 fix(HandelsRegister): Refined HandelsRegister functionality for better error handling and response capture. 2025-01-03 02:19:07 +01:00
13 changed files with 552 additions and 255 deletions

View File

@ -1,5 +1,36 @@
# Changelog
## 2025-01-07 - 1.4.2 - fix(core)
Fix concurrency and download handling in HandelsRegister class and adjust test cases
- Improved the clickFindButton function to include an argument for results limit.
- Enhanced the downloadFile function to rename and ensure files are correctly handled.
- Updated searchCompany method to allow specifying a limit on the number of search results.
- Adjusted test cases to select specific test data indices and output test files to a dedicated directory.
## 2025-01-04 - 1.4.1 - fix(core)
Fix issues with JSONL data processing and improve error handling in business record validation
- Fixed JSONL data processing by adding concurrent processing for each JSON line to enhance performance.
- Added validation logic in BusinessRecord class to ensure that the mandatory fields are checked.
- Adjusted environment variable loading in OpenData class to ensure correct database initialization.
- Included missing dependencies and exports in the project files to ensure proper functionality.
## 2025-01-04 - 1.4.0 - feat(HandelsRegister)
Add file download functionality to HandelsRegister
- Implemented file download feature in the HandelsRegister class.
- Configured pages in Puppeteer to allow downloads and set download paths.
- Parsed German registration information with more robust error handling.
- Added specific methods for downloading and handling 'SI' and 'AD' files.
## 2025-01-03 - 1.3.1 - fix(HandelsRegister)
Refined HandelsRegister functionality for better error handling and response capture.
- Improved parsing logic in parseGermanRegistration function.
- Enhanced navigateToPage and clickFindButton methods with error messages for clarity.
- Implemented a new responseListener to handle and log HTTP responses correctly.
## 2025-01-03 - 1.3.0 - feat(core)
Enhanced data handling capabilities and improved company search functionalities.

View File

@ -30,5 +30,8 @@
"npmci": {
"npmGlobalTools": [],
"npmAccessLevel": "public"
},
"tsdoc": {
"legal": "\n## License and Legal Information\n\nThis repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. \n\n**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.\n\n### Trademarks\n\nThis project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.\n\n### Company Information\n\nTask Venture Capital GmbH \nRegistered at District court Bremen HRB 35230 HB, Germany\n\nFor any legal inquiries or if you require further information, please contact us via email at hello@task.vc.\n\nBy using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.\n"
}
}

View File

@ -1,6 +1,6 @@
{
"name": "@fin.cx/opendata",
"version": "1.3.0",
"version": "1.4.2",
"private": false,
"description": "A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.",
"main": "dist_ts/index.js",
@ -19,19 +19,23 @@
"@git.zone/tsrun": "^1.3.3",
"@git.zone/tstest": "^1.0.90",
"@push.rocks/tapbundle": "^5.5.4",
"@types/node": "^22.10.4"
"@types/node": "^22.10.5"
},
"dependencies": {
"@push.rocks/lik": "^6.1.0",
"@push.rocks/qenv": "^6.1.0",
"@push.rocks/smartarchive": "^4.0.39",
"@push.rocks/smartarray": "^1.1.0",
"@push.rocks/smartbrowser": "^2.0.8",
"@push.rocks/smartdata": "^5.2.10",
"@push.rocks/smartdelay": "^3.0.5",
"@push.rocks/smartfile": "^11.0.23",
"@push.rocks/smartfile": "^11.1.5",
"@push.rocks/smartpath": "^5.0.18",
"@push.rocks/smartpromise": "^4.0.4",
"@push.rocks/smartpromise": "^4.1.0",
"@push.rocks/smartrequest": "^2.0.23",
"@push.rocks/smartstream": "^3.2.5",
"@push.rocks/smartunique": "^3.0.9",
"@push.rocks/smartxml": "^1.1.1",
"@tsclass/tsclass": "^4.2.0"
},
"repository": {

217
pnpm-lock.yaml generated
View File

@ -8,12 +8,18 @@ importers:
.:
dependencies:
'@push.rocks/lik':
specifier: ^6.1.0
version: 6.1.0
'@push.rocks/qenv':
specifier: ^6.1.0
version: 6.1.0
'@push.rocks/smartarchive':
specifier: ^4.0.39
version: 4.0.39
'@push.rocks/smartarray':
specifier: ^1.1.0
version: 1.1.0
'@push.rocks/smartbrowser':
specifier: ^2.0.8
version: 2.0.8
@ -24,20 +30,26 @@ importers:
specifier: ^3.0.5
version: 3.0.5
'@push.rocks/smartfile':
specifier: ^11.0.23
version: 11.0.23
specifier: ^11.1.5
version: 11.1.5
'@push.rocks/smartpath':
specifier: ^5.0.18
version: 5.0.18
'@push.rocks/smartpromise':
specifier: ^4.0.4
version: 4.0.4
specifier: ^4.1.0
version: 4.1.0
'@push.rocks/smartrequest':
specifier: ^2.0.23
version: 2.0.23
'@push.rocks/smartstream':
specifier: ^3.2.5
version: 3.2.5
'@push.rocks/smartunique':
specifier: ^3.0.9
version: 3.0.9
'@push.rocks/smartxml':
specifier: ^1.1.1
version: 1.1.1
'@tsclass/tsclass':
specifier: ^4.2.0
version: 4.2.0
@ -58,8 +70,8 @@ importers:
specifier: ^5.5.4
version: 5.5.4(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0))(@aws-sdk/credential-providers@3.716.0(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0)))(socks@2.8.3)
'@types/node':
specifier: ^22.10.4
version: 22.10.4
specifier: ^22.10.5
version: 22.10.5
packages:
@ -741,6 +753,9 @@ packages:
'@push.rocks/smartarchive@4.0.39':
resolution: {integrity: sha512-e8xOOa7h4WlZMhjEd7IjAL/wgLBS3yJ6+Q7eZognHg1cNE/TOZ1kYrAN9eo8xmTtd+37hY9NXayk2JwXdXEvyA==}
'@push.rocks/smartarray@1.1.0':
resolution: {integrity: sha512-b5YgBmUdglOJH8zeUf2ZWdPCoqySgwvkycRi2BhA9zVZHkpASh39Ej0q0fxFJetlUVyYqGfVoMVjbVrLFfFV7g==}
'@push.rocks/smartbrowser@2.0.8':
resolution: {integrity: sha512-0KWRZj3TuKo/sNwgPbiSE6WL+TMeR19t1JmXBZWh9n8iA2mpc4HhMrQAndEUdRCkx5ofSaHWojIRVFzGChj0Dg==}
@ -786,8 +801,8 @@ packages:
'@push.rocks/smartfile@10.0.41':
resolution: {integrity: sha512-xOOy0duI34M2qrJZggpk51EHGXmg9+mBL1Q55tNiQKXzfx89P3coY1EAZG8tvmep3qB712QEKe7T+u04t42Kjg==}
'@push.rocks/smartfile@11.0.23':
resolution: {integrity: sha512-+tP/ydhNapve/bVDGMoloZmB6eBFvamHVx5uP05GLCKh0KFffNJuMKTHX4DgC0y3BmNm+Bii7jhpQblY+R99kQ==}
'@push.rocks/smartfile@11.1.5':
resolution: {integrity: sha512-N+f7P4MqgkDiPR5CrvzMJF9JVtXQK1Di24cYAd8roFkDVpnILlLQOQql558FPm2QE+HtDLuPLPBu2uaSZRLReA==}
'@push.rocks/smartguard@3.1.0':
resolution: {integrity: sha512-J23q84f1O+TwFGmd4lrO9XLHUh2DaLXo9PN/9VmTWYzTkQDv5JehmifXVI0esophXcCIfbdIu6hbt7/aHlDF4A==}
@ -849,8 +864,8 @@ packages:
'@push.rocks/smartpdf@3.1.8':
resolution: {integrity: sha512-9fxshJAp6VCkrAFWXAFS7X7QzZLFSWM/JzDtllYW7gaWzRKxsMCdfaNy1vKsGq5uK5L91Lrd+A9Olp1mx4xs1w==}
'@push.rocks/smartpromise@4.0.4':
resolution: {integrity: sha512-Mbh+DnX4+rVPEZgYU7LtTJI/AYoNn7+h27AycEFpPJW41DCfjTiXiI0+ecNdyO1AfbcL0Q02RQjoEauEWx5FQg==}
'@push.rocks/smartpromise@4.1.0':
resolution: {integrity: sha512-1E4QZx1bYFMEgbK1C9gb4CB3YRhfkvSeffc5CnT83n7NV4Qly/Sxe9G1Jn0sQBB5+sbFHwTlj/0al5+q4gXiDw==}
'@push.rocks/smartpuppeteer@2.0.2':
resolution: {integrity: sha512-EcYCT0PX++WjfHp7W5UYX3t8x5gSNpJMMUvhA7SHz8b2t76ItslNWxprRcF0CUQyN1fozbf5StZf7dwdGc/dIA==}
@ -1440,8 +1455,8 @@ packages:
'@types/node-forge@1.3.11':
resolution: {integrity: sha512-FQx220y22OKNTqaByeBGqHWYz4cl94tpcxeFdvBo3wjG6XPBuZ0BNgNZRV5J5TFmmcsJ4IzsLkmGRiQbnYsBEQ==}
'@types/node@22.10.4':
resolution: {integrity: sha512-99l6wv4HEzBQhvaU/UGoeBoCK61SCROQaCCGyQSgX2tEQ3rKkNZ2S7CEWnS/4s1LV+8ODdK21UeyR1fHP2mXug==}
'@types/node@22.10.5':
resolution: {integrity: sha512-F8Q+SeGimwOo86fiovQh8qiXfFEh2/ocYv7tU5pJ3EXMSSxk1Joj5wefpFK2fHTf/N6HKGSxIDBT9f3gCxXPkQ==}
'@types/parse5@6.0.3':
resolution: {integrity: sha512-SuT16Q1K51EAVPz1K29DJ/sXjhSQ0zjvsypYJ6tlwVsRV9jwW5Adq2ch8Dq8kDBCkYnELS7N7VNCSB5nC56t/g==}
@ -4169,7 +4184,7 @@ snapshots:
'@push.rocks/smartbuffer': 3.0.4
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartguard': 3.1.0
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/webrequest': 3.0.37
'@push.rocks/webstream': 1.0.10
@ -4185,7 +4200,7 @@ snapshots:
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartenv': 5.0.12
'@push.rocks/smartfeed': 1.0.11
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartjson': 5.0.20
'@push.rocks/smartlog': 3.0.7
'@push.rocks/smartlog-destination-devtools': 1.0.12
@ -4196,7 +4211,7 @@ snapshots:
'@push.rocks/smartntml': 2.0.8
'@push.rocks/smartopen': 2.0.0
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrequest': 2.0.23
'@push.rocks/smartrx': 3.0.7
'@push.rocks/smartsitemap': 2.0.3
@ -4862,7 +4877,7 @@ snapshots:
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartjson': 5.0.20
'@push.rocks/smartmarkdown': 3.0.3
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrouter': 1.3.2
'@push.rocks/smartrx': 3.0.7
'@push.rocks/smartstate': 2.0.19
@ -5042,10 +5057,10 @@ snapshots:
'@push.rocks/early': 4.0.4
'@push.rocks/smartcli': 4.0.11
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartlog': 3.0.7
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
typescript: 5.6.3
transitivePeerDependencies:
- aws-crt
@ -5055,11 +5070,11 @@ snapshots:
'@push.rocks/early': 4.0.4
'@push.rocks/smartcli': 4.0.11
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartlog': 3.0.7
'@push.rocks/smartlog-destination-local': 9.0.2
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartspawn': 3.0.3
'@types/html-minifier': 4.0.5
esbuild: 0.24.2
@ -5072,7 +5087,7 @@ snapshots:
dependencies:
'@push.rocks/smartcli': 4.0.11
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartlog': 3.0.7
'@push.rocks/smartnpm': 2.0.4
'@push.rocks/smartpath': 5.0.18
@ -5082,7 +5097,7 @@ snapshots:
'@git.zone/tsrun@1.3.3':
dependencies:
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartshell': 3.2.2
tsx: 4.19.2
@ -5094,9 +5109,9 @@ snapshots:
'@push.rocks/consolecolor': 2.0.2
'@push.rocks/smartbrowser': 2.0.8
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartlog': 3.0.7
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartshell': 3.2.2
'@push.rocks/tapbundle': 5.5.4(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0))(@aws-sdk/credential-providers@3.716.0(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0)))(socks@2.8.3)
'@types/ws': 8.5.13
@ -5145,7 +5160,7 @@ snapshots:
'@jest/schemas': 29.6.3
'@types/istanbul-lib-coverage': 2.0.6
'@types/istanbul-reports': 3.0.4
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/yargs': 17.0.33
chalk: 4.1.2
@ -5267,7 +5282,7 @@ snapshots:
'@push.rocks/early@4.0.4':
dependencies:
'@push.rocks/consolecolor': 2.0.2
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/isohash@2.0.1':
dependencies:
@ -5283,10 +5298,10 @@ snapshots:
'@push.rocks/smartcache': 1.0.16
'@push.rocks/smartenv': 5.0.12
'@push.rocks/smartexit': 1.0.23
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartjson': 5.0.20
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartstring': 4.0.15
'@push.rocks/smartunique': 3.0.9
'@push.rocks/taskbuffer': 3.1.7
@ -5298,7 +5313,7 @@ snapshots:
dependencies:
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartmatch': 2.0.0
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrx': 3.0.7
'@push.rocks/smarttime': 4.1.1
'@types/minimatch': 5.1.2
@ -5322,7 +5337,7 @@ snapshots:
dependencies:
'@api.global/typedrequest': 3.1.10
'@configvault.io/interfaces': 1.0.17
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartlog': 3.0.7
'@push.rocks/smartpath': 5.0.18
@ -5330,7 +5345,7 @@ snapshots:
dependencies:
'@push.rocks/smartfile': 10.0.41
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrequest': 2.0.23
'@push.rocks/smartrx': 3.0.7
'@push.rocks/smartstream': 2.0.8
@ -5344,9 +5359,9 @@ snapshots:
'@push.rocks/smartarchive@4.0.39':
dependencies:
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrequest': 2.0.23
'@push.rocks/smartrx': 3.0.7
'@push.rocks/smartstream': 3.2.5
@ -5358,6 +5373,8 @@ snapshots:
tar-stream: 3.1.7
through: 2.3.8
'@push.rocks/smartarray@1.1.0': {}
'@push.rocks/smartbrowser@2.0.8':
dependencies:
'@push.rocks/smartdelay': 3.0.5
@ -5375,7 +5392,7 @@ snapshots:
'@aws-sdk/client-s3': 3.717.0
'@push.rocks/smartmime': 2.0.4
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrx': 3.0.7
'@push.rocks/smartstream': 3.2.5
'@push.rocks/smartstring': 4.0.15
@ -5399,7 +5416,7 @@ snapshots:
'@push.rocks/smartchok@1.0.34':
dependencies:
'@push.rocks/lik': 6.1.0
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrx': 3.0.7
'@tempfix/watcher': 2.3.0
@ -5408,13 +5425,13 @@ snapshots:
'@push.rocks/lik': 6.1.0
'@push.rocks/smartlog': 3.0.7
'@push.rocks/smartobject': 1.0.12
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrx': 3.0.7
yargs-parser: 21.1.1
'@push.rocks/smartcrypto@2.0.4':
dependencies:
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@types/node-forge': 1.3.11
node-forge: 1.3.1
@ -5424,7 +5441,7 @@ snapshots:
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartlog': 3.0.7
'@push.rocks/smartmongo': 2.0.10(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0))(@aws-sdk/credential-providers@3.716.0(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0)))(socks@2.8.3)
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrx': 3.0.7
'@push.rocks/smartstring': 4.0.15
'@push.rocks/smarttime': 4.1.1
@ -5446,23 +5463,23 @@ snapshots:
'@push.rocks/smartdelay@3.0.5':
dependencies:
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartenv@5.0.12':
dependencies:
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartexit@1.0.23':
dependencies:
'@push.rocks/lik': 6.1.0
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
tree-kill: 1.2.2
'@push.rocks/smartexpect@1.4.0':
dependencies:
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
fast-deep-equal: 3.1.3
'@push.rocks/smartfeed@1.0.11':
@ -5482,7 +5499,7 @@ snapshots:
'@push.rocks/smartjson': 5.0.20
'@push.rocks/smartmime': 1.0.6
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrequest': 2.0.23
'@push.rocks/smartstream': 2.0.8
'@types/fs-extra': 11.0.4
@ -5492,7 +5509,7 @@ snapshots:
glob: 10.4.5
js-yaml: 4.1.0
'@push.rocks/smartfile@11.0.23':
'@push.rocks/smartfile@11.1.5':
dependencies:
'@push.rocks/lik': 6.1.0
'@push.rocks/smartdelay': 3.0.5
@ -5501,7 +5518,7 @@ snapshots:
'@push.rocks/smartjson': 5.0.20
'@push.rocks/smartmime': 2.0.4
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrequest': 2.0.23
'@push.rocks/smartstream': 3.2.5
'@types/fs-extra': 11.0.4
@ -5513,13 +5530,13 @@ snapshots:
'@push.rocks/smartguard@3.1.0':
dependencies:
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrequest': 2.0.23
'@push.rocks/smarthash@3.0.4':
dependencies:
'@push.rocks/smartjson': 5.0.20
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@types/through2': 2.0.41
through2: 4.0.2
@ -5538,7 +5555,7 @@ snapshots:
dependencies:
'@push.rocks/consolecolor': 2.0.2
'@push.rocks/smartlog-interfaces': 3.0.2
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartlog-interfaces@3.0.2':
dependencies:
@ -5587,7 +5604,7 @@ snapshots:
'@push.rocks/mongodump': 1.0.8(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0))
'@push.rocks/smartdata': 5.2.10(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0))(@aws-sdk/credential-providers@3.716.0(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0)))(socks@2.8.3)
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
mongodb-memory-server: 8.16.1(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0))
transitivePeerDependencies:
- '@aws-sdk/client-sso-oidc'
@ -5618,7 +5635,7 @@ snapshots:
'@push.rocks/smartarchive': 3.0.8
'@push.rocks/smartfile': 10.0.41
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrequest': 2.0.23
'@push.rocks/smarttime': 4.1.1
'@push.rocks/smartversion': 3.0.5
@ -5630,7 +5647,7 @@ snapshots:
dependencies:
'@design.estate/dees-element': 2.0.39
'@happy-dom/global-registrator': 15.11.7
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
fake-indexeddb: 6.0.0
transitivePeerDependencies:
- supports-color
@ -5650,10 +5667,10 @@ snapshots:
dependencies:
'@push.rocks/smartbuffer': 3.0.4
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartnetwork': 3.0.2
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartpuppeteer': 2.0.2
'@push.rocks/smartunique': 3.0.9
'@tsclass/tsclass': 4.2.0
@ -5668,7 +5685,7 @@ snapshots:
- supports-color
- utf-8-validate
'@push.rocks/smartpromise@4.0.4': {}
'@push.rocks/smartpromise@4.1.0': {}
'@push.rocks/smartpuppeteer@2.0.2':
dependencies:
@ -5684,7 +5701,7 @@ snapshots:
'@push.rocks/smartrequest@2.0.23':
dependencies:
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smarturl': 3.1.0
agentkeepalive: 4.6.0
form-data: 4.0.1
@ -5697,13 +5714,13 @@ snapshots:
'@push.rocks/smartrx@3.0.7':
dependencies:
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
rxjs: 7.8.1
'@push.rocks/smarts3@2.2.5':
dependencies:
'@push.rocks/smartbucket': 3.3.7
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartpath': 5.0.18
'@tsclass/tsclass': 4.2.0
'@types/s3rver': 3.7.4
@ -5716,7 +5733,7 @@ snapshots:
dependencies:
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartexit': 1.0.23
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@types/which': 3.0.4
tree-kill: 1.2.2
which: 5.0.0
@ -5741,7 +5758,7 @@ snapshots:
'@push.rocks/smartenv': 5.0.12
'@push.rocks/smartjson': 5.0.20
'@push.rocks/smartlog': 3.0.7
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrx': 3.0.7
'@push.rocks/smarttime': 4.1.1
engine.io: 6.5.4
@ -5754,7 +5771,7 @@ snapshots:
'@push.rocks/smartspawn@3.0.3':
dependencies:
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
spawn-wrap: 2.0.0
threads: 1.7.0
tiny-worker: 2.3.0
@ -5766,13 +5783,13 @@ snapshots:
'@push.rocks/isohash': 2.0.1
'@push.rocks/lik': 6.1.0
'@push.rocks/smartjson': 5.0.20
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrx': 3.0.7
'@push.rocks/webstore': 2.0.20
'@push.rocks/smartstream@2.0.8':
dependencies:
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrx': 3.0.7
'@types/from2': 2.3.5
'@types/through2': 2.0.41
@ -5783,7 +5800,7 @@ snapshots:
dependencies:
'@push.rocks/lik': 6.1.0
'@push.rocks/smartenv': 5.0.12
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrx': 3.0.7
'@push.rocks/smartstring@4.0.15':
@ -5801,7 +5818,7 @@ snapshots:
dependencies:
'@push.rocks/lik': 6.1.0
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
croner: 9.0.0
date-fns: 4.1.0
dayjs: 1.11.13
@ -5840,11 +5857,11 @@ snapshots:
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartenv': 5.0.12
'@push.rocks/smartexpect': 1.4.0
'@push.rocks/smartfile': 11.0.23
'@push.rocks/smartfile': 11.1.5
'@push.rocks/smartjson': 5.0.20
'@push.rocks/smartmongo': 2.0.10(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0))(@aws-sdk/credential-providers@3.716.0(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0)))(socks@2.8.3)
'@push.rocks/smartpath': 5.0.18
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrequest': 2.0.23
'@push.rocks/smarts3': 2.2.5
'@push.rocks/smartshell': 3.2.2
@ -5869,7 +5886,7 @@ snapshots:
'@push.rocks/lik': 6.1.0
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartlog': 3.0.7
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrx': 3.0.7
'@push.rocks/smarttime': 4.1.1
'@push.rocks/smartunique': 3.0.9
@ -5879,7 +5896,7 @@ snapshots:
'@push.rocks/smartdelay': 3.0.5
'@push.rocks/smartenv': 5.0.12
'@push.rocks/smartjson': 5.0.20
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/webstore': 2.0.20
'@push.rocks/websetup@3.0.19':
@ -5894,7 +5911,7 @@ snapshots:
'@push.rocks/lik': 6.1.0
'@push.rocks/smartenv': 5.0.12
'@push.rocks/smartjson': 5.0.20
'@push.rocks/smartpromise': 4.0.4
'@push.rocks/smartpromise': 4.1.0
'@push.rocks/smartrx': 3.0.7
'@tempfix/idb': 8.0.3
fake-indexeddb: 5.0.2
@ -6456,14 +6473,14 @@ snapshots:
'@types/accepts@1.3.7':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/babel__code-frame@7.0.6': {}
'@types/body-parser@1.19.5':
dependencies:
'@types/connect': 3.4.38
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/buffer-json@2.0.3': {}
@ -6479,17 +6496,17 @@ snapshots:
'@types/clean-css@4.2.11':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
source-map: 0.6.1
'@types/co-body@6.1.3':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/qs': 6.9.17
'@types/connect@3.4.38':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/content-disposition@0.5.8': {}
@ -6502,11 +6519,11 @@ snapshots:
'@types/connect': 3.4.38
'@types/express': 5.0.0
'@types/keygrip': 1.0.6
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/cors@2.8.17':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/debounce@1.2.4': {}
@ -6520,14 +6537,14 @@ snapshots:
'@types/express-serve-static-core@4.19.6':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/qs': 6.9.17
'@types/range-parser': 1.2.7
'@types/send': 0.17.4
'@types/express-serve-static-core@5.0.3':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/qs': 6.9.17
'@types/range-parser': 1.2.7
'@types/send': 0.17.4
@ -6552,30 +6569,30 @@ snapshots:
'@types/from2@2.3.5':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/fs-extra@11.0.4':
dependencies:
'@types/jsonfile': 6.1.4
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/fs-extra@9.0.13':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/glob@7.2.0':
dependencies:
'@types/minimatch': 5.1.2
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/glob@8.1.0':
dependencies:
'@types/minimatch': 5.1.2
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/gunzip-maybe@1.4.2':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/hast@3.0.4':
dependencies:
@ -6609,7 +6626,7 @@ snapshots:
'@types/jsonfile@6.1.4':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/keygrip@1.0.6': {}
@ -6626,7 +6643,7 @@ snapshots:
'@types/http-errors': 2.0.4
'@types/keygrip': 1.0.6
'@types/koa-compose': 3.2.8
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/mdast@4.0.4':
dependencies:
@ -6644,9 +6661,9 @@ snapshots:
'@types/node-forge@1.3.11':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/node@22.10.4':
'@types/node@22.10.5':
dependencies:
undici-types: 6.20.0
@ -6664,19 +6681,19 @@ snapshots:
'@types/s3rver@3.7.4':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/semver@7.5.8': {}
'@types/send@0.17.4':
dependencies:
'@types/mime': 1.3.5
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/serve-static@1.15.7':
dependencies:
'@types/http-errors': 2.0.4
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/send': 0.17.4
'@types/sinon-chai@3.2.12':
@ -6696,15 +6713,15 @@ snapshots:
'@types/tar-stream@2.2.3':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/tar-stream@3.1.3':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/through2@2.0.41':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/triple-beam@1.3.5': {}
@ -6728,7 +6745,7 @@ snapshots:
'@types/whatwg-url@8.2.2':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/webidl-conversions': 7.0.3
'@types/which@2.0.2': {}
@ -6737,11 +6754,11 @@ snapshots:
'@types/ws@7.4.7':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/ws@8.5.13':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
'@types/yargs-parser@21.0.3': {}
@ -6751,7 +6768,7 @@ snapshots:
'@types/yauzl@2.10.3':
dependencies:
'@types/node': 22.10.4
'@types/node': 22.10.5
optional: true
'@ungap/structured-clone@1.2.1': {}
@ -7336,7 +7353,7 @@ snapshots:
dependencies:
'@types/cookie': 0.4.1
'@types/cors': 2.8.17
'@types/node': 22.10.4
'@types/node': 22.10.5
accepts: 1.3.8
base64id: 2.0.0
cookie: 0.4.2
@ -8044,7 +8061,7 @@ snapshots:
jest-util@29.7.0:
dependencies:
'@jest/types': 29.6.3
'@types/node': 22.10.4
'@types/node': 22.10.5
chalk: 4.1.2
ci-info: 3.9.0
graceful-fs: 4.2.11

View File

@ -215,4 +215,22 @@ When working with business data, ensuring integrity and accuracy is crucial. Eac
The `@fin.cx/opendata` module provides an extensive toolset for accessing and managing business data, particularly for companies based in Germany. Its functionalities include creating, updating, retrieving, and deleting business records, as well as keeping them current with the latest open data releases. This makes it an invaluable asset for developers aiming to integrate open data seamlessly into their systems, ensuring robust data management capabilities within their applications.
Happy exploring and integrating open data into your projects!
undefined
## License and Legal Information
This repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository.
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
### Trademarks
This project is owned and maintained by Task Venture Capital GmbH. The names and logos associated with Task Venture Capital GmbH and any related products or services are trademarks of Task Venture Capital GmbH and are not included within the scope of the MIT license granted herein. Use of these trademarks must comply with Task Venture Capital GmbH's Trademark Guidelines, and any usage must be approved in writing by Task Venture Capital GmbH.
### Company Information
Task Venture Capital GmbH
Registered at District court Bremen HRB 35230 HB, Germany
For any legal inquiries or if you require further information, please contact us via email at hello@task.vc.
By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.

View File

@ -1,6 +1,11 @@
import { expect, expectAsync, tap } from '@push.rocks/tapbundle';
import { tapNodeTools } from '@push.rocks/tapbundle/node';
import * as opendata from '../ts/index.js'
import { BusinessRecord } from '../ts/classes.businessrecord.js';
let testOpenDataInstance: opendata.OpenData;
tap.test('first test', async () => {
@ -17,17 +22,23 @@ tap.skip.test('should build initial data', async () => {
});
const resultsSearch = tap.test('should get the data for a company', async () => {
const result = await testOpenDataInstance.handelsregister.searchCompany('Volkswagen');
const result = await testOpenDataInstance.handelsregister.searchCompany('Volkswagen', 20);
console.log(result);
return result;
});
tap.test('should get the data for a specific company', async () => {
const testCompany = (await resultsSearch.testResultPromise)[21]['germanParsedRegistration'];
let testCompany: BusinessRecord['data']['germanParsedRegistration'] = (await resultsSearch.testResultPromise)[8]['germanParsedRegistration'];
console.log(`trying to find specific company with:`);
console.log(testCompany);
const result = await testOpenDataInstance.handelsregister.getSpecificCompany(testCompany);
console.log(result);
result.files.map(async (file) => {
await file.writeToDir('./.nogit/testoutput');
});
});
tap.test('should stop the instance', async () => {

View File

@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@fin.cx/opendata',
version: '1.3.0',
version: '1.4.2',
description: 'A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.'
}

View File

@ -5,12 +5,27 @@ export class BusinessRecord extends plugins.smartdata.SmartDataDbDoc<
BusinessRecord,
BusinessRecord
> {
// STATIC
public static getByGermanParsedRegistration = async (parsedGermanRegistrationArg: BusinessRecord['data']['germanParsedRegistration']) => {
const businessRecords = await BusinessRecord.getInstance({
data: {
germanParsedRegistration: parsedGermanRegistrationArg,
}
});
return businessRecords;
};
// INSTANCE
@plugins.smartdata.unI()
id: string;
@plugins.smartdata.svDb()
data: {
name?: string;
startDate?: string;
endDate?: string;
status?: 'active' | 'liquidating' | 'closed';
address?: string;
postalCode?: string;
city?: string;
@ -42,4 +57,11 @@ export class BusinessRecord extends plugins.smartdata.SmartDataDbDoc<
purpose?: string;
lastUpdate?: string;
} = {};
/**
* validates the record against the Handelregister.
*/
public async validate() {
if (!this.data.name) throw new Error('Name is required.');
}
}

View File

@ -1,13 +1,17 @@
import type { BusinessRecord } from './classes.businessrecord.js';
import type { OpenData } from './classes.main.opendata.js';
import * as plugins from './plugins.js';
import * as paths from './paths.js';
/**
* the HandlesRegister exposed as a class
*/
export class HandelsRegister {
private openDataRef: OpenData;
private asyncExecutionStack = new plugins.lik.AsyncExecutionStack();
private uniqueDowloadFolder = plugins.path.join(paths.downloadDir, plugins.smartunique.uniSimple());
// Puppeteer wrapper instance
public smartbrowserInstance = new plugins.smartbrowser.SmartBrowser();
constructor(openDataRef: OpenData) {
@ -15,16 +19,34 @@ export class HandelsRegister {
}
public async start() {
// Start the browser
await plugins.smartfile.fs.ensureDir(this.uniqueDowloadFolder);
await this.smartbrowserInstance.start();
}
public async stop() {
// Stop the browser
await plugins.smartfile.fs.remove(this.uniqueDowloadFolder);
await this.smartbrowserInstance.stop();
}
// page stuff
/**
* Creates a new page and configures it to allow file downloads
* to a predefined path.
*/
public getNewPage = async () => {
const page = await this.smartbrowserInstance.headlessBrowser.newPage();
// 1) Create a DevTools session for this page
const cdpSession = await page.target().createCDPSession();
// 2) Allow file downloads and set the download path
await cdpSession.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: this.uniqueDowloadFolder, // <-- Change this to your desired absolute path
});
// Optionally set viewport and go to page
await page.setViewport({ width: 1920, height: 1080 });
await page.goto('https://www.handelsregister.de/');
return page;
@ -49,9 +71,14 @@ export class HandelsRegister {
};
private waitForResults = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => {
await pageArg.waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', {
timeout: 30000,
});
await pageArg
.waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', {
timeout: 30000,
})
.catch(async (err) => {
await pageArg.screenshot({ path: paths.downloadDir + '/error.png' });
throw err;
});
const businessRecords: BusinessRecord['data'][] = await pageArg.evaluate(() => {
const rows = document.querySelectorAll(
@ -86,13 +113,13 @@ export class HandelsRegister {
return businessRecords;
};
private clickFindButton = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => {
private clickFindButton = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page, resultsLimitArg: number = 100) => {
try {
// Wait for the button with the text "Find" to appear
await pageArg.waitForSelector('span.ui-button-text.ui-c', { timeout: 5000 });
// adjust to 100 results per page
await pageArg.select('#form\\:ergebnisseProSeite_input', '100');
await pageArg.select('#form\\:ergebnisseProSeite_input', `${resultsLimitArg}`);
// Locate and click the button using its text
await pageArg.evaluate(() => {
@ -110,122 +137,222 @@ export class HandelsRegister {
}
};
// parsing stuff
private async downloadFile(
pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page,
typeArg: 'SI' | 'AD'
) {
// Trigger the file download by clicking on the relevant link
await pageArg.evaluate((typeArg2) => {
// Locate the table body
const tableBody = document.querySelector(
'#ergebnissForm\\:selectedSuchErgebnisFormTable_data'
);
if (!tableBody) {
throw new Error('Table body not found');
}
// Locate the first row
const firstRow = tableBody.querySelector('tr:nth-child(1)');
if (!firstRow) {
throw new Error('First row not found');
}
// Locate the last cell in the first row
const lastCell = firstRow.querySelector('td:last-child');
if (!lastCell) {
throw new Error('Last cell not found in the first row');
}
// Locate the download links
const adLink = lastCell.querySelector('a:first-of-type');
const siLink = lastCell.querySelector('a:last-of-type');
if (!siLink) {
throw new Error('SI link not found in the last cell');
}
// Simulate a click on the last <a> element
switch (typeArg2) {
case 'AD':
(adLink as HTMLElement).click();
break;
case 'SI':
(siLink as HTMLElement).click();
break;
default:
throw new Error('Invalid file type');
}
}, typeArg);
await plugins.smartfile.fs.waitForFileToBeReady(this.uniqueDowloadFolder);
const files = await plugins.smartfile.fs.fileTreeToObject(this.uniqueDowloadFolder, '**/*');
const file = files[0];
// lets clear the folder for the next download
await plugins.smartfile.fs.ensureEmptyDir(this.uniqueDowloadFolder);
switch (typeArg) {
case 'AD':
await file.rename(`ad.pdf`);
break;
case 'SI':
await file.rename(`si.xml`);
break;
break;
}
return file;
}
/**
* Helper method to parse the German registration string
*/
private async parseGermanRegistration(
input: string
): Promise<BusinessRecord['data']['germanParsedRegistration']> {
const regex = /District court (\p{L}[\p{L}\s-]*?(?:\s*\([\p{L}\s-]+\))?)\s+(HRA|HRB|GnR|VR|PR|GsR)\s+(\d+)/u;
// e.g. District court Berlin (Charlottenburg) HRB 123456
const regex =
/District court (\p{L}[\p{L}\s-]*?(?:\s*\([\p{L}\s-]+\))?)\s+(HRA|HRB|GnR|VR|PR|GsR)\s+(\d+)/u;
const match = input.match(regex);
if (match) {
return {
court: match[1], // Extracts the court name
type: match[2] as 'HRA' | 'HRB', // Extracts the type and ensures it matches the specified types
number: match[3], // Extracts the number
court: match[1],
type: match[2] as 'HRA' | 'HRB', // Adjust if needed
number: match[3],
};
}
}
/**
* Search for a company by name
* Search for a company by name and return basic info
*/
public async searchCompany(companyNameArg: string) {
const page = await this.getNewPage();
await this.navigateToPage(page, 'Normal search');
public async searchCompany(companyNameArg: string, resultsLimitArg: number = 100) {
return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => {
const page = await this.getNewPage();
await this.navigateToPage(page, 'Normal search');
try {
// Wait for the textarea to appear
try {
// Wait for the textarea to appear
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
// Enter text into the textarea
const inputText = companyNameArg;
await page.evaluate((text) => {
const textarea = document.querySelector<HTMLTextAreaElement>('#form\\:schlagwoerter');
if (textarea) {
textarea.value = text; // Set the value
// Trigger the change event manually if required
const event = new Event('change', { bubbles: true });
textarea.dispatchEvent(event);
}
}, inputText);
console.log('Text entered successfully!');
} catch (error) {
console.error('Failed to find or enter text into the textarea:', error);
}
try {
// Wait for the radio button's label to appear
await page.waitForSelector('label[for="form:schlagwortOptionen:0"]', { timeout: 5000 });
// Click the label to select the radio button
await page.evaluate(() => {
const label = document.querySelector<HTMLLabelElement>(
'label[for="form:schlagwortOptionen:0"]'
);
if (label) {
label.click();
}
});
console.log('Radio button clicked successfully!');
} catch (error) {
console.error('Failed to find or click the radio button:', error);
}
await this.clickFindButton(page, resultsLimitArg);
const businessRecords = await this.waitForResults(page);
// Parse out the registration info
for (const record of businessRecords) {
if (record.registrationId) {
record.germanParsedRegistration = await this.parseGermanRegistration(
record.registrationId
);
}
}
await page.close();
return businessRecords;
}, 60000);
}
/**
* Search for a specific company (known register type/number/court),
* then click on an element that triggers a file download.
*/
public async getSpecificCompany(companyArg: BusinessRecord['data']['germanParsedRegistration']) {
return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => {
const page = await this.getNewPage();
await this.navigateToPage(page, 'Normal search');
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
// Enter text into the textarea using page.evaluate
const inputText = companyNameArg;
await page.evaluate((text) => {
const textarea = document.querySelector<HTMLTextAreaElement>('#form\\:schlagwoerter');
if (textarea) {
textarea.value = text; // Set the value
// Trigger the change event manually if required
const event = new Event('change', { bubbles: true });
textarea.dispatchEvent(event);
}
}, inputText);
// 1) Type of Register (e.g. HRB, HRA, etc.)
await page.waitForSelector('#form\\:registerArt_label');
await page.click('#form\\:registerArt_label');
await page.waitForSelector('#form\\:registerArt_items');
await page.evaluate((type) => {
const options = Array.from(document.querySelectorAll('#form\\:registerArt_items li'));
const targetOption = options.find((option) => option.textContent?.trim() === type);
(targetOption as HTMLElement)?.click();
}, companyArg.type);
console.log('Text entered successfully!');
} catch (error) {
console.error('Failed to find or enter text into the textarea:', error);
}
// 2) Register number
await page.waitForSelector('#form\\:registerNummer');
await page.type('#form\\:registerNummer', companyArg.number);
try {
// Wait for the radio button's label to appear
await page.waitForSelector('label[for="form:schlagwortOptionen:0"]', { timeout: 5000 });
// 3) Register court
await page.waitForSelector('#form\\:registergericht_label');
await page.click('#form\\:registergericht_label');
await page.waitForSelector('#form\\:registergericht_items');
await page.evaluate((court) => {
const options = Array.from(document.querySelectorAll('#form\\:registergericht_items li'));
const targetOption = options.find((option) => option.textContent?.trim() === court);
(targetOption as HTMLElement)?.click();
}, companyArg.court);
// Click the label to select the radio button
await page.evaluate(() => {
const label = document.querySelector<HTMLLabelElement>(
'label[for="form:schlagwortOptionen:0"]'
);
if (label) {
label.click();
}
});
// Click 'Find'
await this.clickFindButton(page);
console.log('Radio button clicked successfully!');
} catch (error) {
console.error('Failed to find or click the radio button:', error);
}
// Optionally grab the results, just for logging
const businessRecords = await this.waitForResults(page);
console.log(businessRecords);
await this.clickFindButton(page);
const files: plugins.smartfile.SmartFile[] = [];
const businessRecords = await this.waitForResults(page);
// download files
files.push(await this.downloadFile(page, 'SI'));
files.push(await this.downloadFile(page, 'AD'));
for (const record of businessRecords) {
record.germanParsedRegistration = await this.parseGermanRegistration(record.registrationId);
}
// At this point, the file should have been downloaded automatically
// to the path specified by `Page.setDownloadBehavior`
await page.close();
await page.close();
// Finally, we return an object, which triggers a JSON file download
return businessRecords;
return {
businessRecords,
files,
};
}, 60000);
}
public async getSpecificCompany(companyArg: BusinessRecord['data']['germanParsedRegistration']) {
const page = await this.getNewPage();
await this.navigateToPage(page, 'Normal search');
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
// 1) Type of Register:
// Open the dropdown to reveal options
await page.waitForSelector('#form\\:registerArt_label');
await page.click('#form\\:registerArt_label'); // Open the dropdown
// Wait for the options and select the one matching companyArg.type
await page.waitForSelector('#form\\:registerArt_items'); // Ensure dropdown options are loaded
await page.evaluate((type) => {
const options = Array.from(document.querySelectorAll('#form\\:registerArt_items li'));
const targetOption = options.find((option) => option.textContent?.trim() === type); // Match type dynamically
(targetOption as any)?.click();
}, companyArg.type); // Pass companyArg.type to the browser context
// 2) Register number:
// Fill in the register number
await page.waitForSelector('#form\\:registerNummer');
await page.type('#form\\:registerNummer', companyArg.number);
// 3) Register court:
// Open the dropdown for the register court
await page.waitForSelector('#form\\:registergericht_label');
await page.click('#form\\:registergericht_label'); // Open the dropdown
// Wait for the options and select the one matching companyArg.court
await page.waitForSelector('#form\\:registergericht_items'); // Ensure dropdown options are loaded
await page.evaluate((court) => {
const options = Array.from(document.querySelectorAll('#form\\:registergericht_items li'));
const targetOption = options.find((option) => option.textContent?.trim() === court); // Match court dynamically
(targetOption as any)?.click();
}, companyArg.court); // Pass companyArg.court to the browser context
await this.clickFindButton(page);
const businessRecords = await this.waitForResults(page);
console.log(businessRecords);
/**
* get specific company by full name
*/
public async getSpecificCompanyByName(companyNameArg: string) {
const businessRecords = await this.searchCompany(companyNameArg, 1);
const result = this.getSpecificCompany(businessRecords[0].germanParsedRegistration);
return result;
}
}

View File

@ -2,16 +2,55 @@ import * as plugins from './plugins.js';
import * as paths from './paths.js';
import type { OpenData } from './classes.main.opendata.js';
export class JsonlDataProcessor {
public openDataRef: OpenData;
constructor(openDataRefArg: OpenData) {
this.openDataRef = openDataRefArg;
export type SeedEntryType = {
all_attributes: {
_registerArt: string;
_registerNummer: string;
additional_data: {
AD: boolean;
CD: boolean;
DK: boolean;
HD: boolean;
SI: boolean;
UT: boolean;
: boolean;
};
federal_state: string;
native_company_number: string;
registered_office: string;
registrar: string;
};
company_number: string;
current_status: string;
jurisdiction_code: string;
name: string;
officers: {
name: string;
other_attributes: {
city: string;
firstname: string;
flag: string;
lastname: string;
};
position: string;
start_date: string; // ISO 8601 date string
type: string;
}[];
registered_address: string;
retrieved_at: string; // ISO 8601 date string
};
export class JsonlDataProcessor<T> {
public forEachFunction: (entryArg: T) => Promise<void>;
constructor(forEachFunctionArg: typeof this.forEachFunction) {
this.forEachFunction = forEachFunctionArg;
}
// TODO: define a mapper as argument instead of hard-coding it
public async processDataFromUrl(dataUrlArg = 'https://daten.offeneregister.de/de_companies_ocdata.jsonl.bz2') {
public async processDataFromUrl(
dataUrlArg = 'https://daten.offeneregister.de/de_companies_ocdata.jsonl.bz2'
) {
const done = plugins.smartpromise.defer();
const promiseArray: Promise<any>[] = [];
const dataExists = await plugins.smartfile.fs.isDirectory(paths.germanBusinessDataDir);
if (!dataExists) {
await plugins.smartfile.fs.ensureDir(paths.germanBusinessDataDir);
@ -19,10 +58,6 @@ export class JsonlDataProcessor {
}
const smartarchive = await plugins.smartarchive.SmartArchive.fromArchiveUrl(dataUrlArg);
promiseArray
.push
// smartarchive.exportToFs(paths.germanBusinessDataDir, 'de_companies_ocdata.jsonl')
();
const jsonlDataStream = await smartarchive.exportToStreamOfStreamFiles();
let totalRecordsCounter = 0;
let nextRest: string = '';
@ -39,44 +74,37 @@ export class JsonlDataProcessor {
const lines = currentString.split('\n');
nextRest = lines.pop();
console.log(`Got another ${lines.length} records.`);
for (const line of lines) {
let entry: any;
if (!line) continue;
try {
entry = JSON.parse(line);
console.log(JSON.stringify(entry, null, 2));
process.exit(0);
} catch (err) {
console.log(line);
await plugins.smartdelay.delayFor(10000);
}
if (!entry) continue;
totalRecordsCounter++;
if (totalRecordsCounter % 10000 === 0) console.log(`${totalRecordsCounter} total records.`);
const businessRecord = new this.openDataRef.CBusinessRecord();
businessRecord.id = await this.openDataRef.CBusinessRecord.getNewId();
businessRecord.data.name = entry.name;
await businessRecord.save();
}
const concurrentProcessor = new plugins.smartarray.ConcurrentProcessor<string>(
async (line) => {
let entry: T;
if (!line) return;
try {
entry = JSON.parse(line);
} catch (err) {
console.log(line);
await plugins.smartdelay.delayFor(10000);
}
if (!entry) return;
totalRecordsCounter++;
if (totalRecordsCounter % 10000 === 0)
console.log(`${totalRecordsCounter} total records.`);
await this.forEachFunction(entry);
},
1000
);
await concurrentProcessor.process(lines);
},
finalFunction: async (streamToolsArg) => {
console.log(`finished processing ${totalRecordsCounter} records.`);
if (!nextRest) return;
JSON.parse(nextRest);
}
done.resolve();
},
})
);
},
})
);
}
public async getBusinessRecordByName(nameArg: string) {
const businessRecord = await this.openDataRef.CBusinessRecord.getInstance({
data: {
name: { $regex: `${nameArg}`, $options: "i" } as any,
}
});
return businessRecord;
await done.promise;
}
}

View File

@ -1,27 +1,37 @@
import { BusinessRecord } from './classes.businessrecord.js';
import { HandelsRegister } from './classes.handelsregister.js';
import { JsonlDataProcessor } from './classes.jsonldata.js';
import { HandelsRegister } from './classes.handelsregister.js';
import { JsonlDataProcessor, type SeedEntryType } from './classes.jsonldata.js';
import * as paths from './paths.js';
import * as plugins from './plugins.js';
export class OpenData {
public db: plugins.smartdata.SmartdataDb;
private serviceQenv = new plugins.qenv.Qenv(paths.packageDir, paths.nogitDir);
public jsonLDataProcessor: JsonlDataProcessor;
public jsonLDataProcessor: JsonlDataProcessor<SeedEntryType>;
public handelsregister: HandelsRegister;
public CBusinessRecord = plugins.smartdata.setDefaultManagerForDoc(this, BusinessRecord);
public async start() {
this.db = new plugins.smartdata.SmartdataDb({
mongoDbUrl: await this.serviceQenv.getEnvVarOnDemand('MONGODB_URL'),
mongoDbName: await this.serviceQenv.getEnvVarOnDemand('MONGODB_NAME'),
mongoDbUser: await this.serviceQenv.getEnvVarOnDemand('MONGODB_USER'),
mongoDbPass: await this.serviceQenv.getEnvVarOnDemand('MONGODB_PASS'),
mongoDbUrl: await this.serviceQenv.getEnvVarOnDemand('MONGODB_URL'),
mongoDbName: await this.serviceQenv.getEnvVarOnDemand('MONGODB_NAME'),
mongoDbUser: await this.serviceQenv.getEnvVarOnDemand('MONGODB_USER'),
mongoDbPass: await this.serviceQenv.getEnvVarOnDemand('MONGODB_PASS'),
});
await this.db.init();
this.jsonLDataProcessor = new JsonlDataProcessor(this);
this.jsonLDataProcessor = new JsonlDataProcessor(async (entryArg) => {
const businessRecord = new this.CBusinessRecord();
businessRecord.id = await this.CBusinessRecord.getNewId();
businessRecord.data.name = entryArg.name;
businessRecord.data.germanParsedRegistration = {
court: entryArg.all_attributes.registered_office,
number: entryArg.all_attributes._registerNummer,
type: entryArg.all_attributes._registerArt as 'HRA' | 'HRB',
};
await businessRecord.save();
});
this.handelsregister = new HandelsRegister(this);
await this.handelsregister.start();
}
@ -30,8 +40,22 @@ export class OpenData {
await this.jsonLDataProcessor.processDataFromUrl();
}
public async slowValidateDb() {
}
public async validateSearchByName() {
}
public async searchDbByBusinessNameAndPostalCode(businessNameArg: string, postalCodeArg: string) {
}
public async stop() {
await this.db.close();
await this.handelsregister.stop();
}
}
}

View File

@ -8,4 +8,8 @@ export const packageDir = plugins.path.join(
export const nogitDir = plugins.path.join(packageDir, './.nogit/');
plugins.smartfile.fs.ensureDirSync(nogitDir);
export const downloadDir = plugins.path.join(nogitDir, 'downloads');
plugins.smartfile.fs.ensureDirSync(downloadDir);
export const germanBusinessDataDir = plugins.path.join(nogitDir, 'germanbusinessdata');

View File

@ -6,8 +6,10 @@ export {
}
// @push.rocks scope
import * as lik from '@push.rocks/lik';
import * as qenv from '@push.rocks/qenv';
import * as smartarchive from '@push.rocks/smartarchive';
import * as smartarray from '@push.rocks/smartarray';
import * as smartbrowser from '@push.rocks/smartbrowser';
import * as smartdata from '@push.rocks/smartdata';
import * as smartdelay from '@push.rocks/smartdelay';
@ -16,10 +18,14 @@ import * as smartpath from '@push.rocks/smartpath';
import * as smartpromise from '@push.rocks/smartpromise';
import * as smartrequest from '@push.rocks/smartrequest';
import * as smartstream from '@push.rocks/smartstream';
import * as smartunique from '@push.rocks/smartunique';
import * as smartxml from '@push.rocks/smartxml';
export {
lik,
qenv,
smartarchive,
smartarray,
smartbrowser,
smartdata,
smartdelay,
@ -28,6 +34,8 @@ export {
smartpromise,
smartrequest,
smartstream,
smartunique,
smartxml,
}
// @tsclass scope