Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
8ebbc16bcd | |||
c177193438 | |||
7c07bc59e4 | |||
e4a8d371f7 | |||
1c0e04cb0d | |||
c3f6ef531b |
23
changelog.md
23
changelog.md
@ -1,5 +1,28 @@
|
||||
# Changelog
|
||||
|
||||
## 2025-01-04 - 1.4.0 - feat(HandelsRegister)
|
||||
Add file download functionality to HandelsRegister
|
||||
|
||||
- Implemented file download feature in the HandelsRegister class.
|
||||
- Configured pages in Puppeteer to allow downloads and set download paths.
|
||||
- Parsed German registration information with more robust error handling.
|
||||
- Added specific methods for downloading and handling 'SI' and 'AD' files.
|
||||
|
||||
## 2025-01-03 - 1.3.1 - fix(HandelsRegister)
|
||||
Refined HandelsRegister functionality for better error handling and response capture.
|
||||
|
||||
- Improved parsing logic in parseGermanRegistration function.
|
||||
- Enhanced navigateToPage and clickFindButton methods with error messages for clarity.
|
||||
- Implemented a new responseListener to handle and log HTTP responses correctly.
|
||||
|
||||
## 2025-01-03 - 1.3.0 - feat(core)
|
||||
Enhanced data handling capabilities and improved company search functionalities.
|
||||
|
||||
- Updated business record handling to support more registration types.
|
||||
- Improved search capabilities for fetching company data with refined registration type matching.
|
||||
- Added robust logging for JSONL data processing with early exit on successful parse.
|
||||
- Reorganized test cases to include specific company data retrieval.
|
||||
|
||||
## 2025-01-02 - 1.2.1 - fix(BusinessRecord)
|
||||
Add missing field registrationType to BusinessRecord data
|
||||
|
||||
|
11
package.json
11
package.json
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@fin.cx/opendata",
|
||||
"version": "1.2.1",
|
||||
"version": "1.4.0",
|
||||
"private": false,
|
||||
"description": "A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.",
|
||||
"main": "dist_ts/index.js",
|
||||
@ -19,19 +19,22 @@
|
||||
"@git.zone/tsrun": "^1.3.3",
|
||||
"@git.zone/tstest": "^1.0.90",
|
||||
"@push.rocks/tapbundle": "^5.5.4",
|
||||
"@types/node": "^22.10.2"
|
||||
"@types/node": "^22.10.4"
|
||||
},
|
||||
"dependencies": {
|
||||
"@push.rocks/lik": "^6.1.0",
|
||||
"@push.rocks/qenv": "^6.1.0",
|
||||
"@push.rocks/smartarchive": "^4.0.39",
|
||||
"@push.rocks/smartbrowser": "^2.0.6",
|
||||
"@push.rocks/smartbrowser": "^2.0.8",
|
||||
"@push.rocks/smartdata": "^5.2.10",
|
||||
"@push.rocks/smartdelay": "^3.0.5",
|
||||
"@push.rocks/smartfile": "^11.0.23",
|
||||
"@push.rocks/smartpath": "^5.0.18",
|
||||
"@push.rocks/smartpromise": "^4.0.4",
|
||||
"@push.rocks/smartrequest": "^2.0.23",
|
||||
"@push.rocks/smartstream": "^3.2.5"
|
||||
"@push.rocks/smartstream": "^3.2.5",
|
||||
"@push.rocks/smartunique": "^3.0.9",
|
||||
"@tsclass/tsclass": "^4.2.0"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
111
pnpm-lock.yaml
generated
111
pnpm-lock.yaml
generated
@ -8,6 +8,9 @@ importers:
|
||||
|
||||
.:
|
||||
dependencies:
|
||||
'@push.rocks/lik':
|
||||
specifier: ^6.1.0
|
||||
version: 6.1.0
|
||||
'@push.rocks/qenv':
|
||||
specifier: ^6.1.0
|
||||
version: 6.1.0
|
||||
@ -15,8 +18,8 @@ importers:
|
||||
specifier: ^4.0.39
|
||||
version: 4.0.39
|
||||
'@push.rocks/smartbrowser':
|
||||
specifier: ^2.0.6
|
||||
version: 2.0.6
|
||||
specifier: ^2.0.8
|
||||
version: 2.0.8
|
||||
'@push.rocks/smartdata':
|
||||
specifier: ^5.2.10
|
||||
version: 5.2.10(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0))(@aws-sdk/credential-providers@3.716.0(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0)))(socks@2.8.3)
|
||||
@ -38,6 +41,12 @@ importers:
|
||||
'@push.rocks/smartstream':
|
||||
specifier: ^3.2.5
|
||||
version: 3.2.5
|
||||
'@push.rocks/smartunique':
|
||||
specifier: ^3.0.9
|
||||
version: 3.0.9
|
||||
'@tsclass/tsclass':
|
||||
specifier: ^4.2.0
|
||||
version: 4.2.0
|
||||
devDependencies:
|
||||
'@git.zone/tsbuild':
|
||||
specifier: ^2.2.0
|
||||
@ -55,8 +64,8 @@ importers:
|
||||
specifier: ^5.5.4
|
||||
version: 5.5.4(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0))(@aws-sdk/credential-providers@3.716.0(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0)))(socks@2.8.3)
|
||||
'@types/node':
|
||||
specifier: ^22.10.2
|
||||
version: 22.10.2
|
||||
specifier: ^22.10.4
|
||||
version: 22.10.4
|
||||
|
||||
packages:
|
||||
|
||||
@ -738,8 +747,8 @@ packages:
|
||||
'@push.rocks/smartarchive@4.0.39':
|
||||
resolution: {integrity: sha512-e8xOOa7h4WlZMhjEd7IjAL/wgLBS3yJ6+Q7eZognHg1cNE/TOZ1kYrAN9eo8xmTtd+37hY9NXayk2JwXdXEvyA==}
|
||||
|
||||
'@push.rocks/smartbrowser@2.0.6':
|
||||
resolution: {integrity: sha512-Ne+KCVhV/DROc1rHRRw59K6h0+LpQAK9fdOUtgDZ7laLPmB/tmnbUh3IuRDNcIY1iVA9pydoobwjnTjVgio9eQ==}
|
||||
'@push.rocks/smartbrowser@2.0.8':
|
||||
resolution: {integrity: sha512-0KWRZj3TuKo/sNwgPbiSE6WL+TMeR19t1JmXBZWh9n8iA2mpc4HhMrQAndEUdRCkx5ofSaHWojIRVFzGChj0Dg==}
|
||||
|
||||
'@push.rocks/smartbucket@3.3.7':
|
||||
resolution: {integrity: sha512-RiOuEtwHJ+HFbV1nlZgh5VuMvP6PXElX6rVe7OSQsyNCBybRQa/d1qDic92+2Ejx852DGeHlyREELQCxd/a/7w==}
|
||||
@ -1343,8 +1352,8 @@ packages:
|
||||
'@types/express-serve-static-core@4.19.6':
|
||||
resolution: {integrity: sha512-N4LZ2xG7DatVqhCZzOGb1Yi5lMbXSZcmdLDe9EzSndPV2HpWYWzRbaerl2n27irrm94EPpprqa8KpskPT085+A==}
|
||||
|
||||
'@types/express-serve-static-core@5.0.2':
|
||||
resolution: {integrity: sha512-vluaspfvWEtE4vcSDlKRNer52DvOGrB2xv6diXy6UKyKW0lqZiWHGNApSyxOv+8DE5Z27IzVvE7hNkxg7EXIcg==}
|
||||
'@types/express-serve-static-core@5.0.3':
|
||||
resolution: {integrity: sha512-JEhMNwUJt7bw728CydvYzntD0XJeTmDnvwLlbfbAhE7Tbslm/ax6bdIiUwTgeVlZTsJQPwZwKpAkyDtIjsvx3g==}
|
||||
|
||||
'@types/express@4.17.21':
|
||||
resolution: {integrity: sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ==}
|
||||
@ -1437,8 +1446,8 @@ packages:
|
||||
'@types/node-forge@1.3.11':
|
||||
resolution: {integrity: sha512-FQx220y22OKNTqaByeBGqHWYz4cl94tpcxeFdvBo3wjG6XPBuZ0BNgNZRV5J5TFmmcsJ4IzsLkmGRiQbnYsBEQ==}
|
||||
|
||||
'@types/node@22.10.2':
|
||||
resolution: {integrity: sha512-Xxr6BBRCAOQixvonOye19wnzyDiUtTeqldOOmj3CkeblonbccA12PFwlufvRdrpjXxqnmUaeiU5EOA+7s5diUQ==}
|
||||
'@types/node@22.10.4':
|
||||
resolution: {integrity: sha512-99l6wv4HEzBQhvaU/UGoeBoCK61SCROQaCCGyQSgX2tEQ3rKkNZ2S7CEWnS/4s1LV+8ODdK21UeyR1fHP2mXug==}
|
||||
|
||||
'@types/parse5@6.0.3':
|
||||
resolution: {integrity: sha512-SuT16Q1K51EAVPz1K29DJ/sXjhSQ0zjvsypYJ6tlwVsRV9jwW5Adq2ch8Dq8kDBCkYnELS7N7VNCSB5nC56t/g==}
|
||||
@ -3793,8 +3802,8 @@ packages:
|
||||
symbol-tree@3.2.4:
|
||||
resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==}
|
||||
|
||||
systeminformation@5.24.1:
|
||||
resolution: {integrity: sha512-zQ5BfdVT7qwhj4bobmAv1EhXBVlXr6nOoS0OlcIigw9WkC+PmEqxLarZIyznDe4uIYmUIc87ahXRa7HGR6EGDQ==}
|
||||
systeminformation@5.24.6:
|
||||
resolution: {integrity: sha512-zxmlzFvPVxlUWmDZX1PK8iUf31/BzrDiAqiTcUwhSGw74D8VWm+ikgBTa38eb5We6o5bZHA4RsTPfYzmDbGvWQ==}
|
||||
engines: {node: '>=8.0.0'}
|
||||
os: [darwin, linux, win32, freebsd, openbsd, netbsd, sunos, android]
|
||||
hasBin: true
|
||||
@ -5089,7 +5098,7 @@ snapshots:
|
||||
'@git.zone/tsbundle': 2.1.0
|
||||
'@git.zone/tsrun': 1.3.3
|
||||
'@push.rocks/consolecolor': 2.0.2
|
||||
'@push.rocks/smartbrowser': 2.0.6
|
||||
'@push.rocks/smartbrowser': 2.0.8
|
||||
'@push.rocks/smartdelay': 3.0.5
|
||||
'@push.rocks/smartfile': 11.0.23
|
||||
'@push.rocks/smartlog': 3.0.7
|
||||
@ -5142,7 +5151,7 @@ snapshots:
|
||||
'@jest/schemas': 29.6.3
|
||||
'@types/istanbul-lib-coverage': 2.0.6
|
||||
'@types/istanbul-reports': 3.0.4
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
'@types/yargs': 17.0.33
|
||||
chalk: 4.1.2
|
||||
|
||||
@ -5355,7 +5364,7 @@ snapshots:
|
||||
tar-stream: 3.1.7
|
||||
through: 2.3.8
|
||||
|
||||
'@push.rocks/smartbrowser@2.0.6':
|
||||
'@push.rocks/smartbrowser@2.0.8':
|
||||
dependencies:
|
||||
'@push.rocks/smartdelay': 3.0.5
|
||||
'@push.rocks/smartpdf': 3.1.8
|
||||
@ -5606,7 +5615,7 @@ snapshots:
|
||||
'@types/default-gateway': 3.0.1
|
||||
isopen: 1.3.0
|
||||
public-ip: 6.0.2
|
||||
systeminformation: 5.24.1
|
||||
systeminformation: 5.24.6
|
||||
|
||||
'@push.rocks/smartnpm@2.0.4':
|
||||
dependencies:
|
||||
@ -6453,14 +6462,14 @@ snapshots:
|
||||
|
||||
'@types/accepts@1.3.7':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/babel__code-frame@7.0.6': {}
|
||||
|
||||
'@types/body-parser@1.19.5':
|
||||
dependencies:
|
||||
'@types/connect': 3.4.38
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/buffer-json@2.0.3': {}
|
||||
|
||||
@ -6476,17 +6485,17 @@ snapshots:
|
||||
|
||||
'@types/clean-css@4.2.11':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
source-map: 0.6.1
|
||||
|
||||
'@types/co-body@6.1.3':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
'@types/qs': 6.9.17
|
||||
|
||||
'@types/connect@3.4.38':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/content-disposition@0.5.8': {}
|
||||
|
||||
@ -6499,11 +6508,11 @@ snapshots:
|
||||
'@types/connect': 3.4.38
|
||||
'@types/express': 5.0.0
|
||||
'@types/keygrip': 1.0.6
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/cors@2.8.17':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/debounce@1.2.4': {}
|
||||
|
||||
@ -6517,14 +6526,14 @@ snapshots:
|
||||
|
||||
'@types/express-serve-static-core@4.19.6':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
'@types/qs': 6.9.17
|
||||
'@types/range-parser': 1.2.7
|
||||
'@types/send': 0.17.4
|
||||
|
||||
'@types/express-serve-static-core@5.0.2':
|
||||
'@types/express-serve-static-core@5.0.3':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
'@types/qs': 6.9.17
|
||||
'@types/range-parser': 1.2.7
|
||||
'@types/send': 0.17.4
|
||||
@ -6539,7 +6548,7 @@ snapshots:
|
||||
'@types/express@5.0.0':
|
||||
dependencies:
|
||||
'@types/body-parser': 1.19.5
|
||||
'@types/express-serve-static-core': 5.0.2
|
||||
'@types/express-serve-static-core': 5.0.3
|
||||
'@types/qs': 6.9.17
|
||||
'@types/serve-static': 1.15.7
|
||||
|
||||
@ -6549,30 +6558,30 @@ snapshots:
|
||||
|
||||
'@types/from2@2.3.5':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/fs-extra@11.0.4':
|
||||
dependencies:
|
||||
'@types/jsonfile': 6.1.4
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/fs-extra@9.0.13':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/glob@7.2.0':
|
||||
dependencies:
|
||||
'@types/minimatch': 5.1.2
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/glob@8.1.0':
|
||||
dependencies:
|
||||
'@types/minimatch': 5.1.2
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/gunzip-maybe@1.4.2':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/hast@3.0.4':
|
||||
dependencies:
|
||||
@ -6606,7 +6615,7 @@ snapshots:
|
||||
|
||||
'@types/jsonfile@6.1.4':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/keygrip@1.0.6': {}
|
||||
|
||||
@ -6623,7 +6632,7 @@ snapshots:
|
||||
'@types/http-errors': 2.0.4
|
||||
'@types/keygrip': 1.0.6
|
||||
'@types/koa-compose': 3.2.8
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/mdast@4.0.4':
|
||||
dependencies:
|
||||
@ -6641,9 +6650,9 @@ snapshots:
|
||||
|
||||
'@types/node-forge@1.3.11':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/node@22.10.2':
|
||||
'@types/node@22.10.4':
|
||||
dependencies:
|
||||
undici-types: 6.20.0
|
||||
|
||||
@ -6661,19 +6670,19 @@ snapshots:
|
||||
|
||||
'@types/s3rver@3.7.4':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/semver@7.5.8': {}
|
||||
|
||||
'@types/send@0.17.4':
|
||||
dependencies:
|
||||
'@types/mime': 1.3.5
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/serve-static@1.15.7':
|
||||
dependencies:
|
||||
'@types/http-errors': 2.0.4
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
'@types/send': 0.17.4
|
||||
|
||||
'@types/sinon-chai@3.2.12':
|
||||
@ -6693,15 +6702,15 @@ snapshots:
|
||||
|
||||
'@types/tar-stream@2.2.3':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/tar-stream@3.1.3':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/through2@2.0.41':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/triple-beam@1.3.5': {}
|
||||
|
||||
@ -6725,7 +6734,7 @@ snapshots:
|
||||
|
||||
'@types/whatwg-url@8.2.2':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
'@types/webidl-conversions': 7.0.3
|
||||
|
||||
'@types/which@2.0.2': {}
|
||||
@ -6734,11 +6743,11 @@ snapshots:
|
||||
|
||||
'@types/ws@7.4.7':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/ws@8.5.13':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
|
||||
'@types/yargs-parser@21.0.3': {}
|
||||
|
||||
@ -6748,7 +6757,7 @@ snapshots:
|
||||
|
||||
'@types/yauzl@2.10.3':
|
||||
dependencies:
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
optional: true
|
||||
|
||||
'@ungap/structured-clone@1.2.1': {}
|
||||
@ -7333,7 +7342,7 @@ snapshots:
|
||||
dependencies:
|
||||
'@types/cookie': 0.4.1
|
||||
'@types/cors': 2.8.17
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
accepts: 1.3.8
|
||||
base64id: 2.0.0
|
||||
cookie: 0.4.2
|
||||
@ -7499,7 +7508,7 @@ snapshots:
|
||||
|
||||
extract-zip@2.0.1:
|
||||
dependencies:
|
||||
debug: 4.4.0
|
||||
debug: 4.3.4
|
||||
get-stream: 5.2.0
|
||||
yauzl: 2.10.0
|
||||
optionalDependencies:
|
||||
@ -8041,7 +8050,7 @@ snapshots:
|
||||
jest-util@29.7.0:
|
||||
dependencies:
|
||||
'@jest/types': 29.6.3
|
||||
'@types/node': 22.10.2
|
||||
'@types/node': 22.10.4
|
||||
chalk: 4.1.2
|
||||
ci-info: 3.9.0
|
||||
graceful-fs: 4.2.11
|
||||
@ -9418,7 +9427,7 @@ snapshots:
|
||||
|
||||
symbol-tree@3.2.4: {}
|
||||
|
||||
systeminformation@5.24.1: {}
|
||||
systeminformation@5.24.6: {}
|
||||
|
||||
tar-fs@2.1.1:
|
||||
dependencies:
|
||||
|
17
test/test.ts
17
test/test.ts
@ -12,8 +12,21 @@ tap.test('should start the instance', async () => {
|
||||
await testOpenDataInstance.start();
|
||||
})
|
||||
|
||||
tap.test('should get the data for a company', async () => {
|
||||
const result = await testOpenDataInstance.handelsregister.getDataForCompany('Volkswagen');
|
||||
tap.skip.test('should build initial data', async () => {
|
||||
await testOpenDataInstance.buildInitialDb();
|
||||
});
|
||||
|
||||
const resultsSearch = tap.test('should get the data for a company', async () => {
|
||||
const result = await testOpenDataInstance.handelsregister.searchCompany('Volkswagen');
|
||||
console.log(result);
|
||||
return result;
|
||||
});
|
||||
|
||||
tap.test('should get the data for a specific company', async () => {
|
||||
const testCompany = (await resultsSearch.testResultPromise)[21]['germanParsedRegistration'];
|
||||
console.log(`trying to find specific company with:`);
|
||||
console.log(testCompany);
|
||||
const result = await testOpenDataInstance.handelsregister.getSpecificCompany(testCompany);
|
||||
console.log(result);
|
||||
});
|
||||
|
||||
|
@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@fin.cx/opendata',
|
||||
version: '1.2.1',
|
||||
version: '1.4.0',
|
||||
description: 'A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.'
|
||||
}
|
||||
|
@ -1,33 +1,45 @@
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
@plugins.smartdata.Manager()
|
||||
export class BusinessRecord extends plugins.smartdata.SmartDataDbDoc<BusinessRecord, BusinessRecord> {
|
||||
|
||||
export class BusinessRecord extends plugins.smartdata.SmartDataDbDoc<
|
||||
BusinessRecord,
|
||||
BusinessRecord
|
||||
> {
|
||||
@plugins.smartdata.unI()
|
||||
id: string;
|
||||
|
||||
|
||||
@plugins.smartdata.svDb()
|
||||
data: {
|
||||
name?: string,
|
||||
address?: string,
|
||||
postalCode?: string,
|
||||
city?: string,
|
||||
country?: string,
|
||||
phone?: string,
|
||||
fax?: string,
|
||||
email?: string,
|
||||
website?: string,
|
||||
businessType?: string,
|
||||
registrationType?: 'HRA' | 'HRB';
|
||||
registrationNumber?: string,
|
||||
registrationCourt?: string,
|
||||
legalForm?: string,
|
||||
managingDirectors?: string[],
|
||||
boardOfDirectors?: string[],
|
||||
supervisoryBoard?: string[],
|
||||
foundingDate?: string,
|
||||
capital?: string,
|
||||
purpose?: string,
|
||||
lastUpdate?: string
|
||||
name?: string;
|
||||
address?: string;
|
||||
postalCode?: string;
|
||||
city?: string;
|
||||
country?: string;
|
||||
phone?: string;
|
||||
fax?: string;
|
||||
email?: string;
|
||||
website?: string;
|
||||
businessType?: string;
|
||||
registrationId?: string;
|
||||
germanParsedRegistration?: {
|
||||
court?: string;
|
||||
type?: 'HRA' | 'HRB' | 'GnR' | 'PR' | 'VR' | 'GsR';
|
||||
number?: string;
|
||||
};
|
||||
legalForm?:
|
||||
| 'GmbH'
|
||||
| 'GmbH & Co. KG'
|
||||
| 'AG'
|
||||
| 'LLC'
|
||||
| 'LLP'
|
||||
| 'GmbH & Co. KGaA'
|
||||
| 'GmbH & Co. KGaA, LLC';
|
||||
managingDirectors?: string[];
|
||||
boardOfDirectors?: string[];
|
||||
supervisoryBoard?: string[];
|
||||
foundingDate?: string;
|
||||
capital?: string;
|
||||
purpose?: string;
|
||||
lastUpdate?: string;
|
||||
} = {};
|
||||
}
|
||||
}
|
||||
|
@ -1,12 +1,17 @@
|
||||
import type { BusinessRecord } from './classes.businessrecord.js';
|
||||
import type { OpenData } from './classes.main.opendata.js';
|
||||
import * as plugins from './plugins.js';
|
||||
import * as paths from './paths.js';
|
||||
|
||||
/**
|
||||
* the HandlesRegister exposed as a class
|
||||
*/
|
||||
export class HandelsRegister {
|
||||
private openDataRef: OpenData;
|
||||
private asyncExecutionStack = new plugins.lik.AsyncExecutionStack();
|
||||
private uniqueDowloadFolder = plugins.path.join(paths.downloadDir, plugins.smartunique.uniSimple());
|
||||
|
||||
// Puppeteer wrapper instance
|
||||
public smartbrowserInstance = new plugins.smartbrowser.SmartBrowser();
|
||||
|
||||
constructor(openDataRef: OpenData) {
|
||||
@ -14,96 +19,72 @@ export class HandelsRegister {
|
||||
}
|
||||
|
||||
public async start() {
|
||||
// Start the browser
|
||||
await plugins.smartfile.fs.ensureDir(this.uniqueDowloadFolder);
|
||||
await this.smartbrowserInstance.start();
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
// Stop the browser
|
||||
await plugins.smartfile.fs.remove(this.uniqueDowloadFolder);
|
||||
await this.smartbrowserInstance.stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a company by name
|
||||
* Creates a new page and configures it to allow file downloads
|
||||
* to a predefined path.
|
||||
*/
|
||||
public async getDataForCompany(companyNameArg: string) {
|
||||
public getNewPage = async () => {
|
||||
const page = await this.smartbrowserInstance.headlessBrowser.newPage();
|
||||
|
||||
// 1) Create a DevTools session for this page
|
||||
const cdpSession = await page.target().createCDPSession();
|
||||
|
||||
// 2) Allow file downloads and set the download path
|
||||
await cdpSession.send('Page.setDownloadBehavior', {
|
||||
behavior: 'allow',
|
||||
downloadPath: this.uniqueDowloadFolder, // <-- Change this to your desired absolute path
|
||||
});
|
||||
|
||||
// Optionally set viewport and go to page
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
await page.goto('https://www.handelsregister.de/');
|
||||
await page.evaluate(() => {
|
||||
const elements = Array.from(document.querySelectorAll('.ui-menuitem-text > span'));
|
||||
const targetElement = elements.find((el) => el.textContent?.trim() === 'Normal search');
|
||||
if (targetElement) {
|
||||
(targetElement as HTMLElement).click();
|
||||
}
|
||||
});
|
||||
return page;
|
||||
};
|
||||
|
||||
private navigateToPage = async (
|
||||
pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page,
|
||||
pageNameArg: string
|
||||
) => {
|
||||
try {
|
||||
// Wait for the textarea to appear
|
||||
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
|
||||
|
||||
// Enter text into the textarea using page.evaluate
|
||||
const inputText = companyNameArg;
|
||||
await page.evaluate((text) => {
|
||||
const textarea = document.querySelector<HTMLTextAreaElement>('#form\\:schlagwoerter');
|
||||
if (textarea) {
|
||||
textarea.value = text; // Set the value
|
||||
// Trigger the change event manually if required
|
||||
const event = new Event('change', { bubbles: true });
|
||||
textarea.dispatchEvent(event);
|
||||
await pageArg.evaluate((pageNameArg2) => {
|
||||
const elements = Array.from(document.querySelectorAll('.ui-menuitem-text > span'));
|
||||
const targetElement = elements.find((el) => el.textContent?.trim() === pageNameArg2);
|
||||
if (targetElement) {
|
||||
(targetElement as HTMLElement).click();
|
||||
}
|
||||
}, inputText);
|
||||
|
||||
console.log('Text entered successfully!');
|
||||
}, pageNameArg);
|
||||
console.log(`Navigated to the ${pageNameArg} page successfully.`);
|
||||
} catch (error) {
|
||||
console.error('Failed to find or enter text into the textarea:', error);
|
||||
console.error(`Failed to navigate to the ${pageNameArg} page:`, error);
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
// Wait for the radio button's label to appear
|
||||
await page.waitForSelector('label[for="form:schlagwortOptionen:0"]', { timeout: 5000 });
|
||||
|
||||
// Click the label to select the radio button
|
||||
await page.evaluate(() => {
|
||||
const label = document.querySelector<HTMLLabelElement>(
|
||||
'label[for="form:schlagwortOptionen:0"]'
|
||||
);
|
||||
if (label) {
|
||||
label.click();
|
||||
}
|
||||
private waitForResults = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => {
|
||||
await pageArg
|
||||
.waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', {
|
||||
timeout: 30000,
|
||||
})
|
||||
.catch(async (err) => {
|
||||
await pageArg.screenshot({ path: paths.downloadDir + '/error.png' });
|
||||
throw err;
|
||||
});
|
||||
|
||||
console.log('Radio button clicked successfully!');
|
||||
} catch (error) {
|
||||
console.error('Failed to find or click the radio button:', error);
|
||||
}
|
||||
|
||||
try {
|
||||
// Wait for the button with the text "Find" to appear
|
||||
await page.waitForSelector('span.ui-button-text.ui-c', { timeout: 5000 });
|
||||
|
||||
// Locate and click the button using its text
|
||||
await page.evaluate(() => {
|
||||
const buttons = Array.from(document.querySelectorAll('span.ui-button-text.ui-c'));
|
||||
const targetButton = buttons.find((button) => button.textContent?.trim() === 'Find');
|
||||
if (targetButton) {
|
||||
const parentButton = targetButton.closest('button') || targetButton;
|
||||
(parentButton as HTMLElement).click();
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Find button clicked successfully!');
|
||||
} catch (error) {
|
||||
console.error('Failed to find or click the "Find" button:', error);
|
||||
}
|
||||
|
||||
await page.waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', {
|
||||
timeout: 10000,
|
||||
});
|
||||
|
||||
const businessRecords = await page.evaluate(() => {
|
||||
const businessRecords: BusinessRecord['data'][] = await pageArg.evaluate(() => {
|
||||
const rows = document.querySelectorAll(
|
||||
'#ergebnissForm\\:selectedSuchErgebnisFormTable_data > tr'
|
||||
);
|
||||
const records = [];
|
||||
const records: BusinessRecord['data'][] = [];
|
||||
|
||||
rows.forEach((row) => {
|
||||
const nameElement = row.querySelector('td.ui-panelgrid-cell span.marginLeft20');
|
||||
@ -116,23 +97,243 @@ export class HandelsRegister {
|
||||
const name = nameElement?.textContent?.trim();
|
||||
const city = cityElement?.textContent?.trim();
|
||||
const status = statusElement?.textContent?.trim();
|
||||
const registrationCourt = registrationCourtElement?.textContent?.trim();
|
||||
const registrationId = registrationCourtElement?.textContent?.trim();
|
||||
|
||||
// Push parsed data into records array
|
||||
records.push({
|
||||
name,
|
||||
city,
|
||||
registrationCourt,
|
||||
registrationId,
|
||||
businessType: status,
|
||||
});
|
||||
});
|
||||
|
||||
return records;
|
||||
});
|
||||
|
||||
await page.close();
|
||||
|
||||
// Finally, we return an object, which triggers a JSON file download
|
||||
return businessRecords;
|
||||
};
|
||||
|
||||
private clickFindButton = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => {
|
||||
try {
|
||||
// Wait for the button with the text "Find" to appear
|
||||
await pageArg.waitForSelector('span.ui-button-text.ui-c', { timeout: 5000 });
|
||||
|
||||
// adjust to 100 results per page
|
||||
await pageArg.select('#form\\:ergebnisseProSeite_input', '100');
|
||||
|
||||
// Locate and click the button using its text
|
||||
await pageArg.evaluate(() => {
|
||||
const buttons = Array.from(document.querySelectorAll('span.ui-button-text.ui-c'));
|
||||
const targetButton = buttons.find((button) => button.textContent?.trim() === 'Find');
|
||||
if (targetButton) {
|
||||
const parentButton = targetButton.closest('button') || targetButton;
|
||||
(parentButton as HTMLElement).click();
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Find button clicked successfully!');
|
||||
} catch (error) {
|
||||
console.error('Failed to find or click the "Find" button:', error);
|
||||
}
|
||||
};
|
||||
|
||||
private async downloadFile(
|
||||
pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page,
|
||||
typeArg: 'SI' | 'AD'
|
||||
) {
|
||||
// Trigger the file download by clicking on the relevant link
|
||||
await pageArg.evaluate((typeArg2) => {
|
||||
// Locate the table body
|
||||
const tableBody = document.querySelector(
|
||||
'#ergebnissForm\\:selectedSuchErgebnisFormTable_data'
|
||||
);
|
||||
if (!tableBody) {
|
||||
throw new Error('Table body not found');
|
||||
}
|
||||
|
||||
// Locate the first row
|
||||
const firstRow = tableBody.querySelector('tr:nth-child(1)');
|
||||
if (!firstRow) {
|
||||
throw new Error('First row not found');
|
||||
}
|
||||
|
||||
// Locate the last cell in the first row
|
||||
const lastCell = firstRow.querySelector('td:last-child');
|
||||
if (!lastCell) {
|
||||
throw new Error('Last cell not found in the first row');
|
||||
}
|
||||
|
||||
// Locate the download links
|
||||
const adLink = lastCell.querySelector('a:first-of-type');
|
||||
const siLink = lastCell.querySelector('a:last-of-type');
|
||||
if (!siLink) {
|
||||
throw new Error('SI link not found in the last cell');
|
||||
}
|
||||
|
||||
// Simulate a click on the last <a> element
|
||||
switch (typeArg2) {
|
||||
case 'AD':
|
||||
(adLink as HTMLElement).click();
|
||||
break;
|
||||
case 'SI':
|
||||
(siLink as HTMLElement).click();
|
||||
break;
|
||||
default:
|
||||
throw new Error('Invalid file type');
|
||||
}
|
||||
}, typeArg);
|
||||
|
||||
// Wait a bit for the download to complete (you might want to implement
|
||||
// a more robust file-exists check or a wait-for-download library)
|
||||
await pageArg.waitForTimeout(10000);
|
||||
|
||||
const files = await plugins.smartfile.fs.fileTreeToObject(this.uniqueDowloadFolder, '**/*');
|
||||
await plugins.smartfile.fs.ensureEmptyDir(this.uniqueDowloadFolder);
|
||||
|
||||
return files [0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to parse the German registration string
|
||||
*/
|
||||
private async parseGermanRegistration(
|
||||
input: string
|
||||
): Promise<BusinessRecord['data']['germanParsedRegistration']> {
|
||||
// e.g. District court Berlin (Charlottenburg) HRB 123456
|
||||
const regex =
|
||||
/District court (\p{L}[\p{L}\s-]*?(?:\s*\([\p{L}\s-]+\))?)\s+(HRA|HRB|GnR|VR|PR|GsR)\s+(\d+)/u;
|
||||
const match = input.match(regex);
|
||||
|
||||
if (match) {
|
||||
return {
|
||||
court: match[1],
|
||||
type: match[2] as 'HRA' | 'HRB', // Adjust if needed
|
||||
number: match[3],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a company by name and return basic info
|
||||
*/
|
||||
public async searchCompany(companyNameArg: string) {
|
||||
return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => {
|
||||
const page = await this.getNewPage();
|
||||
await this.navigateToPage(page, 'Normal search');
|
||||
|
||||
try {
|
||||
// Wait for the textarea to appear
|
||||
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
|
||||
|
||||
// Enter text into the textarea
|
||||
const inputText = companyNameArg;
|
||||
await page.evaluate((text) => {
|
||||
const textarea = document.querySelector<HTMLTextAreaElement>('#form\\:schlagwoerter');
|
||||
if (textarea) {
|
||||
textarea.value = text; // Set the value
|
||||
// Trigger the change event manually if required
|
||||
const event = new Event('change', { bubbles: true });
|
||||
textarea.dispatchEvent(event);
|
||||
}
|
||||
}, inputText);
|
||||
|
||||
console.log('Text entered successfully!');
|
||||
} catch (error) {
|
||||
console.error('Failed to find or enter text into the textarea:', error);
|
||||
}
|
||||
|
||||
try {
|
||||
// Wait for the radio button's label to appear
|
||||
await page.waitForSelector('label[for="form:schlagwortOptionen:0"]', { timeout: 5000 });
|
||||
|
||||
// Click the label to select the radio button
|
||||
await page.evaluate(() => {
|
||||
const label = document.querySelector<HTMLLabelElement>(
|
||||
'label[for="form:schlagwortOptionen:0"]'
|
||||
);
|
||||
if (label) {
|
||||
label.click();
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Radio button clicked successfully!');
|
||||
} catch (error) {
|
||||
console.error('Failed to find or click the radio button:', error);
|
||||
}
|
||||
|
||||
await this.clickFindButton(page);
|
||||
|
||||
const businessRecords = await this.waitForResults(page);
|
||||
|
||||
// Parse out the registration info
|
||||
for (const record of businessRecords) {
|
||||
if (record.registrationId) {
|
||||
record.germanParsedRegistration = await this.parseGermanRegistration(
|
||||
record.registrationId
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
await page.close();
|
||||
return businessRecords;
|
||||
}, 60000);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a specific company (known register type/number/court),
|
||||
* then click on an element that triggers a file download.
|
||||
*/
|
||||
public async getSpecificCompany(companyArg: BusinessRecord['data']['germanParsedRegistration']) {
|
||||
return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => {
|
||||
const page = await this.getNewPage();
|
||||
await this.navigateToPage(page, 'Normal search');
|
||||
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
|
||||
|
||||
// 1) Type of Register (e.g. HRB, HRA, etc.)
|
||||
await page.waitForSelector('#form\\:registerArt_label');
|
||||
await page.click('#form\\:registerArt_label');
|
||||
await page.waitForSelector('#form\\:registerArt_items');
|
||||
await page.evaluate((type) => {
|
||||
const options = Array.from(document.querySelectorAll('#form\\:registerArt_items li'));
|
||||
const targetOption = options.find((option) => option.textContent?.trim() === type);
|
||||
(targetOption as HTMLElement)?.click();
|
||||
}, companyArg.type);
|
||||
|
||||
// 2) Register number
|
||||
await page.waitForSelector('#form\\:registerNummer');
|
||||
await page.type('#form\\:registerNummer', companyArg.number);
|
||||
|
||||
// 3) Register court
|
||||
await page.waitForSelector('#form\\:registergericht_label');
|
||||
await page.click('#form\\:registergericht_label');
|
||||
await page.waitForSelector('#form\\:registergericht_items');
|
||||
await page.evaluate((court) => {
|
||||
const options = Array.from(document.querySelectorAll('#form\\:registergericht_items li'));
|
||||
const targetOption = options.find((option) => option.textContent?.trim() === court);
|
||||
(targetOption as HTMLElement)?.click();
|
||||
}, companyArg.court);
|
||||
|
||||
// Click 'Find'
|
||||
await this.clickFindButton(page);
|
||||
|
||||
// Optionally grab the results, just for logging
|
||||
const businessRecords = await this.waitForResults(page);
|
||||
console.log(businessRecords);
|
||||
|
||||
const files: plugins.smartfile.SmartFile[] = [];
|
||||
|
||||
// download files
|
||||
files.push(await this.downloadFile(page, 'SI'));
|
||||
files.push(await this.downloadFile(page, 'AD'));
|
||||
|
||||
// At this point, the file should have been downloaded automatically
|
||||
// to the path specified by `Page.setDownloadBehavior`
|
||||
await page.close();
|
||||
|
||||
return {
|
||||
businessRecords,
|
||||
files,
|
||||
};
|
||||
}, 60000);
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,8 @@ export class JsonlDataProcessor {
|
||||
if (!line) continue;
|
||||
try {
|
||||
entry = JSON.parse(line);
|
||||
console.log(JSON.stringify(entry, null, 2));
|
||||
process.exit(0);
|
||||
} catch (err) {
|
||||
console.log(line);
|
||||
await plugins.smartdelay.delayFor(10000);
|
||||
|
@ -25,6 +25,11 @@ export class OpenData {
|
||||
this.handelsregister = new HandelsRegister(this);
|
||||
await this.handelsregister.start();
|
||||
}
|
||||
|
||||
public async buildInitialDb() {
|
||||
await this.jsonLDataProcessor.processDataFromUrl();
|
||||
}
|
||||
|
||||
public async stop() {
|
||||
await this.db.close();
|
||||
await this.handelsregister.stop();
|
||||
|
@ -8,4 +8,8 @@ export const packageDir = plugins.path.join(
|
||||
export const nogitDir = plugins.path.join(packageDir, './.nogit/');
|
||||
plugins.smartfile.fs.ensureDirSync(nogitDir);
|
||||
|
||||
export const downloadDir = plugins.path.join(nogitDir, 'downloads');
|
||||
plugins.smartfile.fs.ensureDirSync(downloadDir);
|
||||
|
||||
|
||||
export const germanBusinessDataDir = plugins.path.join(nogitDir, 'germanbusinessdata');
|
@ -6,6 +6,7 @@ export {
|
||||
}
|
||||
|
||||
// @push.rocks scope
|
||||
import * as lik from '@push.rocks/lik';
|
||||
import * as qenv from '@push.rocks/qenv';
|
||||
import * as smartarchive from '@push.rocks/smartarchive';
|
||||
import * as smartbrowser from '@push.rocks/smartbrowser';
|
||||
@ -16,8 +17,10 @@ import * as smartpath from '@push.rocks/smartpath';
|
||||
import * as smartpromise from '@push.rocks/smartpromise';
|
||||
import * as smartrequest from '@push.rocks/smartrequest';
|
||||
import * as smartstream from '@push.rocks/smartstream';
|
||||
import * as smartunique from '@push.rocks/smartunique';
|
||||
|
||||
export {
|
||||
lik,
|
||||
qenv,
|
||||
smartarchive,
|
||||
smartbrowser,
|
||||
@ -28,4 +31,12 @@ export {
|
||||
smartpromise,
|
||||
smartrequest,
|
||||
smartstream,
|
||||
}
|
||||
smartunique,
|
||||
}
|
||||
|
||||
// @tsclass scope
|
||||
import * as tsclass from '@tsclass/tsclass';
|
||||
|
||||
export {
|
||||
tsclass,
|
||||
}
|
||||
|
Reference in New Issue
Block a user