From c3f6ef531b74a46c5b3d816e9dd77cbff93dca86 Mon Sep 17 00:00:00 2001 From: Philipp Kunz Date: Fri, 3 Jan 2025 01:36:26 +0100 Subject: [PATCH] feat(core): Enhanced data handling capabilities and improved company search functionalities. --- changelog.md | 8 ++ package.json | 7 +- pnpm-lock.yaml | 105 +++++++++-------- test/test.ts | 17 ++- ts/00_commitinfo_data.ts | 2 +- ts/classes.businessrecord.ts | 62 ++++++---- ts/classes.handelsregister.ts | 213 ++++++++++++++++++++++++---------- ts/classes.jsonldata.ts | 2 + ts/classes.main.opendata.ts | 5 + ts/plugins.ts | 9 +- 10 files changed, 287 insertions(+), 143 deletions(-) diff --git a/changelog.md b/changelog.md index 50f1fb2..3411ed3 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,13 @@ # Changelog +## 2025-01-03 - 1.3.0 - feat(core) +Enhanced data handling capabilities and improved company search functionalities. + +- Updated business record handling to support more registration types. +- Improved search capabilities for fetching company data with refined registration type matching. +- Added robust logging for JSONL data processing with early exit on successful parse. +- Reorganized test cases to include specific company data retrieval. + ## 2025-01-02 - 1.2.1 - fix(BusinessRecord) Add missing field registrationType to BusinessRecord data diff --git a/package.json b/package.json index 6c8ec2b..3e83484 100644 --- a/package.json +++ b/package.json @@ -19,19 +19,20 @@ "@git.zone/tsrun": "^1.3.3", "@git.zone/tstest": "^1.0.90", "@push.rocks/tapbundle": "^5.5.4", - "@types/node": "^22.10.2" + "@types/node": "^22.10.4" }, "dependencies": { "@push.rocks/qenv": "^6.1.0", "@push.rocks/smartarchive": "^4.0.39", - "@push.rocks/smartbrowser": "^2.0.6", + "@push.rocks/smartbrowser": "^2.0.8", "@push.rocks/smartdata": "^5.2.10", "@push.rocks/smartdelay": "^3.0.5", "@push.rocks/smartfile": "^11.0.23", "@push.rocks/smartpath": "^5.0.18", "@push.rocks/smartpromise": "^4.0.4", "@push.rocks/smartrequest": "^2.0.23", - "@push.rocks/smartstream": "^3.2.5" + "@push.rocks/smartstream": "^3.2.5", + "@tsclass/tsclass": "^4.2.0" }, "repository": { "type": "git", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2b667ea..4787c15 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -15,8 +15,8 @@ importers: specifier: ^4.0.39 version: 4.0.39 '@push.rocks/smartbrowser': - specifier: ^2.0.6 - version: 2.0.6 + specifier: ^2.0.8 + version: 2.0.8 '@push.rocks/smartdata': specifier: ^5.2.10 version: 5.2.10(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0))(@aws-sdk/credential-providers@3.716.0(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0)))(socks@2.8.3) @@ -38,6 +38,9 @@ importers: '@push.rocks/smartstream': specifier: ^3.2.5 version: 3.2.5 + '@tsclass/tsclass': + specifier: ^4.2.0 + version: 4.2.0 devDependencies: '@git.zone/tsbuild': specifier: ^2.2.0 @@ -55,8 +58,8 @@ importers: specifier: ^5.5.4 version: 5.5.4(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0))(@aws-sdk/credential-providers@3.716.0(@aws-sdk/client-sso-oidc@3.716.0(@aws-sdk/client-sts@3.716.0)))(socks@2.8.3) '@types/node': - specifier: ^22.10.2 - version: 22.10.2 + specifier: ^22.10.4 + version: 22.10.4 packages: @@ -738,8 +741,8 @@ packages: '@push.rocks/smartarchive@4.0.39': resolution: {integrity: sha512-e8xOOa7h4WlZMhjEd7IjAL/wgLBS3yJ6+Q7eZognHg1cNE/TOZ1kYrAN9eo8xmTtd+37hY9NXayk2JwXdXEvyA==} - '@push.rocks/smartbrowser@2.0.6': - resolution: {integrity: sha512-Ne+KCVhV/DROc1rHRRw59K6h0+LpQAK9fdOUtgDZ7laLPmB/tmnbUh3IuRDNcIY1iVA9pydoobwjnTjVgio9eQ==} + '@push.rocks/smartbrowser@2.0.8': + resolution: {integrity: sha512-0KWRZj3TuKo/sNwgPbiSE6WL+TMeR19t1JmXBZWh9n8iA2mpc4HhMrQAndEUdRCkx5ofSaHWojIRVFzGChj0Dg==} '@push.rocks/smartbucket@3.3.7': resolution: {integrity: sha512-RiOuEtwHJ+HFbV1nlZgh5VuMvP6PXElX6rVe7OSQsyNCBybRQa/d1qDic92+2Ejx852DGeHlyREELQCxd/a/7w==} @@ -1343,8 +1346,8 @@ packages: '@types/express-serve-static-core@4.19.6': resolution: {integrity: sha512-N4LZ2xG7DatVqhCZzOGb1Yi5lMbXSZcmdLDe9EzSndPV2HpWYWzRbaerl2n27irrm94EPpprqa8KpskPT085+A==} - '@types/express-serve-static-core@5.0.2': - resolution: {integrity: sha512-vluaspfvWEtE4vcSDlKRNer52DvOGrB2xv6diXy6UKyKW0lqZiWHGNApSyxOv+8DE5Z27IzVvE7hNkxg7EXIcg==} + '@types/express-serve-static-core@5.0.3': + resolution: {integrity: sha512-JEhMNwUJt7bw728CydvYzntD0XJeTmDnvwLlbfbAhE7Tbslm/ax6bdIiUwTgeVlZTsJQPwZwKpAkyDtIjsvx3g==} '@types/express@4.17.21': resolution: {integrity: sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ==} @@ -1437,8 +1440,8 @@ packages: '@types/node-forge@1.3.11': resolution: {integrity: sha512-FQx220y22OKNTqaByeBGqHWYz4cl94tpcxeFdvBo3wjG6XPBuZ0BNgNZRV5J5TFmmcsJ4IzsLkmGRiQbnYsBEQ==} - '@types/node@22.10.2': - resolution: {integrity: sha512-Xxr6BBRCAOQixvonOye19wnzyDiUtTeqldOOmj3CkeblonbccA12PFwlufvRdrpjXxqnmUaeiU5EOA+7s5diUQ==} + '@types/node@22.10.4': + resolution: {integrity: sha512-99l6wv4HEzBQhvaU/UGoeBoCK61SCROQaCCGyQSgX2tEQ3rKkNZ2S7CEWnS/4s1LV+8ODdK21UeyR1fHP2mXug==} '@types/parse5@6.0.3': resolution: {integrity: sha512-SuT16Q1K51EAVPz1K29DJ/sXjhSQ0zjvsypYJ6tlwVsRV9jwW5Adq2ch8Dq8kDBCkYnELS7N7VNCSB5nC56t/g==} @@ -3793,8 +3796,8 @@ packages: symbol-tree@3.2.4: resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==} - systeminformation@5.24.1: - resolution: {integrity: sha512-zQ5BfdVT7qwhj4bobmAv1EhXBVlXr6nOoS0OlcIigw9WkC+PmEqxLarZIyznDe4uIYmUIc87ahXRa7HGR6EGDQ==} + systeminformation@5.24.6: + resolution: {integrity: sha512-zxmlzFvPVxlUWmDZX1PK8iUf31/BzrDiAqiTcUwhSGw74D8VWm+ikgBTa38eb5We6o5bZHA4RsTPfYzmDbGvWQ==} engines: {node: '>=8.0.0'} os: [darwin, linux, win32, freebsd, openbsd, netbsd, sunos, android] hasBin: true @@ -5089,7 +5092,7 @@ snapshots: '@git.zone/tsbundle': 2.1.0 '@git.zone/tsrun': 1.3.3 '@push.rocks/consolecolor': 2.0.2 - '@push.rocks/smartbrowser': 2.0.6 + '@push.rocks/smartbrowser': 2.0.8 '@push.rocks/smartdelay': 3.0.5 '@push.rocks/smartfile': 11.0.23 '@push.rocks/smartlog': 3.0.7 @@ -5142,7 +5145,7 @@ snapshots: '@jest/schemas': 29.6.3 '@types/istanbul-lib-coverage': 2.0.6 '@types/istanbul-reports': 3.0.4 - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/yargs': 17.0.33 chalk: 4.1.2 @@ -5355,7 +5358,7 @@ snapshots: tar-stream: 3.1.7 through: 2.3.8 - '@push.rocks/smartbrowser@2.0.6': + '@push.rocks/smartbrowser@2.0.8': dependencies: '@push.rocks/smartdelay': 3.0.5 '@push.rocks/smartpdf': 3.1.8 @@ -5606,7 +5609,7 @@ snapshots: '@types/default-gateway': 3.0.1 isopen: 1.3.0 public-ip: 6.0.2 - systeminformation: 5.24.1 + systeminformation: 5.24.6 '@push.rocks/smartnpm@2.0.4': dependencies: @@ -6453,14 +6456,14 @@ snapshots: '@types/accepts@1.3.7': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/babel__code-frame@7.0.6': {} '@types/body-parser@1.19.5': dependencies: '@types/connect': 3.4.38 - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/buffer-json@2.0.3': {} @@ -6476,17 +6479,17 @@ snapshots: '@types/clean-css@4.2.11': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 source-map: 0.6.1 '@types/co-body@6.1.3': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/qs': 6.9.17 '@types/connect@3.4.38': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/content-disposition@0.5.8': {} @@ -6499,11 +6502,11 @@ snapshots: '@types/connect': 3.4.38 '@types/express': 5.0.0 '@types/keygrip': 1.0.6 - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/cors@2.8.17': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/debounce@1.2.4': {} @@ -6517,14 +6520,14 @@ snapshots: '@types/express-serve-static-core@4.19.6': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/qs': 6.9.17 '@types/range-parser': 1.2.7 '@types/send': 0.17.4 - '@types/express-serve-static-core@5.0.2': + '@types/express-serve-static-core@5.0.3': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/qs': 6.9.17 '@types/range-parser': 1.2.7 '@types/send': 0.17.4 @@ -6539,7 +6542,7 @@ snapshots: '@types/express@5.0.0': dependencies: '@types/body-parser': 1.19.5 - '@types/express-serve-static-core': 5.0.2 + '@types/express-serve-static-core': 5.0.3 '@types/qs': 6.9.17 '@types/serve-static': 1.15.7 @@ -6549,30 +6552,30 @@ snapshots: '@types/from2@2.3.5': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/fs-extra@11.0.4': dependencies: '@types/jsonfile': 6.1.4 - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/fs-extra@9.0.13': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/glob@7.2.0': dependencies: '@types/minimatch': 5.1.2 - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/glob@8.1.0': dependencies: '@types/minimatch': 5.1.2 - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/gunzip-maybe@1.4.2': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/hast@3.0.4': dependencies: @@ -6606,7 +6609,7 @@ snapshots: '@types/jsonfile@6.1.4': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/keygrip@1.0.6': {} @@ -6623,7 +6626,7 @@ snapshots: '@types/http-errors': 2.0.4 '@types/keygrip': 1.0.6 '@types/koa-compose': 3.2.8 - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/mdast@4.0.4': dependencies: @@ -6641,9 +6644,9 @@ snapshots: '@types/node-forge@1.3.11': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 - '@types/node@22.10.2': + '@types/node@22.10.4': dependencies: undici-types: 6.20.0 @@ -6661,19 +6664,19 @@ snapshots: '@types/s3rver@3.7.4': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/semver@7.5.8': {} '@types/send@0.17.4': dependencies: '@types/mime': 1.3.5 - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/serve-static@1.15.7': dependencies: '@types/http-errors': 2.0.4 - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/send': 0.17.4 '@types/sinon-chai@3.2.12': @@ -6693,15 +6696,15 @@ snapshots: '@types/tar-stream@2.2.3': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/tar-stream@3.1.3': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/through2@2.0.41': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/triple-beam@1.3.5': {} @@ -6725,7 +6728,7 @@ snapshots: '@types/whatwg-url@8.2.2': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/webidl-conversions': 7.0.3 '@types/which@2.0.2': {} @@ -6734,11 +6737,11 @@ snapshots: '@types/ws@7.4.7': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/ws@8.5.13': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 '@types/yargs-parser@21.0.3': {} @@ -6748,7 +6751,7 @@ snapshots: '@types/yauzl@2.10.3': dependencies: - '@types/node': 22.10.2 + '@types/node': 22.10.4 optional: true '@ungap/structured-clone@1.2.1': {} @@ -7333,7 +7336,7 @@ snapshots: dependencies: '@types/cookie': 0.4.1 '@types/cors': 2.8.17 - '@types/node': 22.10.2 + '@types/node': 22.10.4 accepts: 1.3.8 base64id: 2.0.0 cookie: 0.4.2 @@ -7499,7 +7502,7 @@ snapshots: extract-zip@2.0.1: dependencies: - debug: 4.4.0 + debug: 4.3.4 get-stream: 5.2.0 yauzl: 2.10.0 optionalDependencies: @@ -8041,7 +8044,7 @@ snapshots: jest-util@29.7.0: dependencies: '@jest/types': 29.6.3 - '@types/node': 22.10.2 + '@types/node': 22.10.4 chalk: 4.1.2 ci-info: 3.9.0 graceful-fs: 4.2.11 @@ -9418,7 +9421,7 @@ snapshots: symbol-tree@3.2.4: {} - systeminformation@5.24.1: {} + systeminformation@5.24.6: {} tar-fs@2.1.1: dependencies: diff --git a/test/test.ts b/test/test.ts index 1bdc60c..ac0eb48 100644 --- a/test/test.ts +++ b/test/test.ts @@ -12,8 +12,21 @@ tap.test('should start the instance', async () => { await testOpenDataInstance.start(); }) -tap.test('should get the data for a company', async () => { - const result = await testOpenDataInstance.handelsregister.getDataForCompany('Volkswagen'); +tap.skip.test('should build initial data', async () => { + await testOpenDataInstance.buildInitialDb(); +}); + +const resultsSearch = tap.test('should get the data for a company', async () => { + const result = await testOpenDataInstance.handelsregister.searchCompany('Volkswagen'); + console.log(result); + return result; +}); + +tap.test('should get the data for a specific company', async () => { + const testCompany = (await resultsSearch.testResultPromise)[21]['germanParsedRegistration']; + console.log(`trying to find specific company with:`); + console.log(testCompany); + const result = await testOpenDataInstance.handelsregister.getSpecificCompany(testCompany); console.log(result); }); diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index 231b365..c38cd55 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@fin.cx/opendata', - version: '1.2.1', + version: '1.3.0', description: 'A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.' } diff --git a/ts/classes.businessrecord.ts b/ts/classes.businessrecord.ts index f7e0fb3..cab89d1 100644 --- a/ts/classes.businessrecord.ts +++ b/ts/classes.businessrecord.ts @@ -1,33 +1,45 @@ import * as plugins from './plugins.js'; @plugins.smartdata.Manager() -export class BusinessRecord extends plugins.smartdata.SmartDataDbDoc { - +export class BusinessRecord extends plugins.smartdata.SmartDataDbDoc< + BusinessRecord, + BusinessRecord +> { @plugins.smartdata.unI() id: string; - + @plugins.smartdata.svDb() data: { - name?: string, - address?: string, - postalCode?: string, - city?: string, - country?: string, - phone?: string, - fax?: string, - email?: string, - website?: string, - businessType?: string, - registrationType?: 'HRA' | 'HRB'; - registrationNumber?: string, - registrationCourt?: string, - legalForm?: string, - managingDirectors?: string[], - boardOfDirectors?: string[], - supervisoryBoard?: string[], - foundingDate?: string, - capital?: string, - purpose?: string, - lastUpdate?: string + name?: string; + address?: string; + postalCode?: string; + city?: string; + country?: string; + phone?: string; + fax?: string; + email?: string; + website?: string; + businessType?: string; + registrationId?: string; + germanParsedRegistration?: { + court?: string; + type?: 'HRA' | 'HRB' | 'GnR' | 'PR' | 'VR' | 'GsR'; + number?: string; + }; + legalForm?: + | 'GmbH' + | 'GmbH & Co. KG' + | 'AG' + | 'LLC' + | 'LLP' + | 'GmbH & Co. KGaA' + | 'GmbH & Co. KGaA, LLC'; + managingDirectors?: string[]; + boardOfDirectors?: string[]; + supervisoryBoard?: string[]; + foundingDate?: string; + capital?: string; + purpose?: string; + lastUpdate?: string; } = {}; -} \ No newline at end of file +} diff --git a/ts/classes.handelsregister.ts b/ts/classes.handelsregister.ts index 76854ef..a46a035 100644 --- a/ts/classes.handelsregister.ts +++ b/ts/classes.handelsregister.ts @@ -1,3 +1,4 @@ +import type { BusinessRecord } from './classes.businessrecord.js'; import type { OpenData } from './classes.main.opendata.js'; import * as plugins from './plugins.js'; @@ -21,21 +22,117 @@ export class HandelsRegister { await this.smartbrowserInstance.stop(); } - /** - * Search for a company by name - */ - public async getDataForCompany(companyNameArg: string) { + // page stuff + public getNewPage = async () => { const page = await this.smartbrowserInstance.headlessBrowser.newPage(); await page.setViewport({ width: 1920, height: 1080 }); await page.goto('https://www.handelsregister.de/'); - await page.evaluate(() => { - const elements = Array.from(document.querySelectorAll('.ui-menuitem-text > span')); - const targetElement = elements.find((el) => el.textContent?.trim() === 'Normal search'); - if (targetElement) { - (targetElement as HTMLElement).click(); - } + return page; + }; + + private navigateToPage = async ( + pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page, + pageNameArg: string + ) => { + try { + await pageArg.evaluate((pageNameArg2) => { + const elements = Array.from(document.querySelectorAll('.ui-menuitem-text > span')); + const targetElement = elements.find((el) => el.textContent?.trim() === pageNameArg2); + if (targetElement) { + (targetElement as HTMLElement).click(); + } + }, pageNameArg); + console.log(`Navigated to the ${pageNameArg} page successfully.`); + } catch (error) { + console.error(`Failed to navigate to the ${pageNameArg} page:`, error); + } + }; + + private waitForResults = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => { + await pageArg.waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', { + timeout: 30000, }); + const businessRecords: BusinessRecord['data'][] = await pageArg.evaluate(() => { + const rows = document.querySelectorAll( + '#ergebnissForm\\:selectedSuchErgebnisFormTable_data > tr' + ); + const records: BusinessRecord['data'][] = []; + + rows.forEach((row) => { + const nameElement = row.querySelector('td.ui-panelgrid-cell span.marginLeft20'); + const cityElement = row.querySelector('td.ui-panelgrid-cell.sitzSuchErgebnisse span'); + const statusElement = row.querySelector('td.ui-panelgrid-cell span.verticalText'); + const registrationCourtElement = row.querySelector( + 'td.ui-panelgrid-cell.fontTableNameSize' + ); + + const name = nameElement?.textContent?.trim(); + const city = cityElement?.textContent?.trim(); + const status = statusElement?.textContent?.trim(); + const registrationId = registrationCourtElement?.textContent?.trim(); + + // Push parsed data into records array + records.push({ + name, + city, + registrationId, + businessType: status, + }); + }); + + return records; + }); + return businessRecords; + }; + + private clickFindButton = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => { + try { + // Wait for the button with the text "Find" to appear + await pageArg.waitForSelector('span.ui-button-text.ui-c', { timeout: 5000 }); + + // adjust to 100 results per page + await pageArg.select('#form\\:ergebnisseProSeite_input', '100'); + + // Locate and click the button using its text + await pageArg.evaluate(() => { + const buttons = Array.from(document.querySelectorAll('span.ui-button-text.ui-c')); + const targetButton = buttons.find((button) => button.textContent?.trim() === 'Find'); + if (targetButton) { + const parentButton = targetButton.closest('button') || targetButton; + (parentButton as HTMLElement).click(); + } + }); + + console.log('Find button clicked successfully!'); + } catch (error) { + console.error('Failed to find or click the "Find" button:', error); + } + }; + + // parsing stuff + private async parseGermanRegistration( + input: string + ): Promise { + const regex = /District court (\p{L}[\p{L}\s-]*?(?:\s*\([\p{L}\s-]+\))?)\s+(HRA|HRB|GnR|VR|PR|GsR)\s+(\d+)/u; + const match = input.match(regex); + + if (match) { + return { + court: match[1], // Extracts the court name + type: match[2] as 'HRA' | 'HRB', // Extracts the type and ensures it matches the specified types + number: match[3], // Extracts the number + }; + } + } + + /** + * Search for a company by name + */ + public async searchCompany(companyNameArg: string) { + const page = await this.getNewPage(); + await this.navigateToPage(page, 'Normal search'); + try { // Wait for the textarea to appear await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 }); @@ -76,63 +173,59 @@ export class HandelsRegister { console.error('Failed to find or click the radio button:', error); } - try { - // Wait for the button with the text "Find" to appear - await page.waitForSelector('span.ui-button-text.ui-c', { timeout: 5000 }); + await this.clickFindButton(page); - // Locate and click the button using its text - await page.evaluate(() => { - const buttons = Array.from(document.querySelectorAll('span.ui-button-text.ui-c')); - const targetButton = buttons.find((button) => button.textContent?.trim() === 'Find'); - if (targetButton) { - const parentButton = targetButton.closest('button') || targetButton; - (parentButton as HTMLElement).click(); - } - }); + const businessRecords = await this.waitForResults(page); - console.log('Find button clicked successfully!'); - } catch (error) { - console.error('Failed to find or click the "Find" button:', error); + for (const record of businessRecords) { + record.germanParsedRegistration = await this.parseGermanRegistration(record.registrationId); } - await page.waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', { - timeout: 10000, - }); - - const businessRecords = await page.evaluate(() => { - const rows = document.querySelectorAll( - '#ergebnissForm\\:selectedSuchErgebnisFormTable_data > tr' - ); - const records = []; - - rows.forEach((row) => { - const nameElement = row.querySelector('td.ui-panelgrid-cell span.marginLeft20'); - const cityElement = row.querySelector('td.ui-panelgrid-cell.sitzSuchErgebnisse span'); - const statusElement = row.querySelector('td.ui-panelgrid-cell span.verticalText'); - const registrationCourtElement = row.querySelector( - 'td.ui-panelgrid-cell.fontTableNameSize' - ); - - const name = nameElement?.textContent?.trim(); - const city = cityElement?.textContent?.trim(); - const status = statusElement?.textContent?.trim(); - const registrationCourt = registrationCourtElement?.textContent?.trim(); - - // Push parsed data into records array - records.push({ - name, - city, - registrationCourt, - businessType: status, - }); - }); - - return records; - }); - await page.close(); // Finally, we return an object, which triggers a JSON file download return businessRecords; } + + public async getSpecificCompany(companyArg: BusinessRecord['data']['germanParsedRegistration']) { + const page = await this.getNewPage(); + await this.navigateToPage(page, 'Normal search'); + await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 }); + + // 1) Type of Register: +// Open the dropdown to reveal options +await page.waitForSelector('#form\\:registerArt_label'); +await page.click('#form\\:registerArt_label'); // Open the dropdown + +// Wait for the options and select the one matching companyArg.type +await page.waitForSelector('#form\\:registerArt_items'); // Ensure dropdown options are loaded +await page.evaluate((type) => { + const options = Array.from(document.querySelectorAll('#form\\:registerArt_items li')); + const targetOption = options.find((option) => option.textContent?.trim() === type); // Match type dynamically + (targetOption as any)?.click(); +}, companyArg.type); // Pass companyArg.type to the browser context + +// 2) Register number: +// Fill in the register number +await page.waitForSelector('#form\\:registerNummer'); +await page.type('#form\\:registerNummer', companyArg.number); + +// 3) Register court: +// Open the dropdown for the register court +await page.waitForSelector('#form\\:registergericht_label'); +await page.click('#form\\:registergericht_label'); // Open the dropdown + +// Wait for the options and select the one matching companyArg.court +await page.waitForSelector('#form\\:registergericht_items'); // Ensure dropdown options are loaded +await page.evaluate((court) => { + const options = Array.from(document.querySelectorAll('#form\\:registergericht_items li')); + const targetOption = options.find((option) => option.textContent?.trim() === court); // Match court dynamically + (targetOption as any)?.click(); +}, companyArg.court); // Pass companyArg.court to the browser context + + await this.clickFindButton(page); + + const businessRecords = await this.waitForResults(page); + console.log(businessRecords); + } } diff --git a/ts/classes.jsonldata.ts b/ts/classes.jsonldata.ts index f934c59..e728ad3 100644 --- a/ts/classes.jsonldata.ts +++ b/ts/classes.jsonldata.ts @@ -44,6 +44,8 @@ export class JsonlDataProcessor { if (!line) continue; try { entry = JSON.parse(line); + console.log(JSON.stringify(entry, null, 2)); + process.exit(0); } catch (err) { console.log(line); await plugins.smartdelay.delayFor(10000); diff --git a/ts/classes.main.opendata.ts b/ts/classes.main.opendata.ts index 55a138d..f2d1e50 100644 --- a/ts/classes.main.opendata.ts +++ b/ts/classes.main.opendata.ts @@ -25,6 +25,11 @@ export class OpenData { this.handelsregister = new HandelsRegister(this); await this.handelsregister.start(); } + + public async buildInitialDb() { + await this.jsonLDataProcessor.processDataFromUrl(); + } + public async stop() { await this.db.close(); await this.handelsregister.stop(); diff --git a/ts/plugins.ts b/ts/plugins.ts index 82cadd5..ce80539 100644 --- a/ts/plugins.ts +++ b/ts/plugins.ts @@ -28,4 +28,11 @@ export { smartpromise, smartrequest, smartstream, -} \ No newline at end of file +} + +// @tsclass scope +import * as tsclass from '@tsclass/tsclass'; + +export { + tsclass, +}