From e4a8d371f7a650cd2f6119ac8471ee16366c20cc Mon Sep 17 00:00:00 2001 From: Philipp Kunz Date: Fri, 3 Jan 2025 02:19:07 +0100 Subject: [PATCH] fix(HandelsRegister): Refined HandelsRegister functionality for better error handling and response capture. --- changelog.md | 7 +++ ts/00_commitinfo_data.ts | 2 +- ts/classes.handelsregister.ts | 115 ++++++++++++++++++++++++++-------- 3 files changed, 97 insertions(+), 27 deletions(-) diff --git a/changelog.md b/changelog.md index 3411ed3..f56dab5 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,12 @@ # Changelog +## 2025-01-03 - 1.3.1 - fix(HandelsRegister) +Refined HandelsRegister functionality for better error handling and response capture. + +- Improved parsing logic in parseGermanRegistration function. +- Enhanced navigateToPage and clickFindButton methods with error messages for clarity. +- Implemented a new responseListener to handle and log HTTP responses correctly. + ## 2025-01-03 - 1.3.0 - feat(core) Enhanced data handling capabilities and improved company search functionalities. diff --git a/ts/00_commitinfo_data.ts b/ts/00_commitinfo_data.ts index c38cd55..0b71923 100644 --- a/ts/00_commitinfo_data.ts +++ b/ts/00_commitinfo_data.ts @@ -3,6 +3,6 @@ */ export const commitinfo = { name: '@fin.cx/opendata', - version: '1.3.0', + version: '1.3.1', description: 'A TypeScript library for accessing, managing, and updating open business data, focused on German companies and integrating with MongoDB.' } diff --git a/ts/classes.handelsregister.ts b/ts/classes.handelsregister.ts index a46a035..11bb7c1 100644 --- a/ts/classes.handelsregister.ts +++ b/ts/classes.handelsregister.ts @@ -114,7 +114,8 @@ export class HandelsRegister { private async parseGermanRegistration( input: string ): Promise { - const regex = /District court (\p{L}[\p{L}\s-]*?(?:\s*\([\p{L}\s-]+\))?)\s+(HRA|HRB|GnR|VR|PR|GsR)\s+(\d+)/u; + const regex = + /District court (\p{L}[\p{L}\s-]*?(?:\s*\([\p{L}\s-]+\))?)\s+(HRA|HRB|GnR|VR|PR|GsR)\s+(\d+)/u; const match = input.match(regex); if (match) { @@ -193,39 +194,101 @@ export class HandelsRegister { await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 }); // 1) Type of Register: -// Open the dropdown to reveal options -await page.waitForSelector('#form\\:registerArt_label'); -await page.click('#form\\:registerArt_label'); // Open the dropdown + // Open the dropdown to reveal options + await page.waitForSelector('#form\\:registerArt_label'); + await page.click('#form\\:registerArt_label'); // Open the dropdown -// Wait for the options and select the one matching companyArg.type -await page.waitForSelector('#form\\:registerArt_items'); // Ensure dropdown options are loaded -await page.evaluate((type) => { - const options = Array.from(document.querySelectorAll('#form\\:registerArt_items li')); - const targetOption = options.find((option) => option.textContent?.trim() === type); // Match type dynamically - (targetOption as any)?.click(); -}, companyArg.type); // Pass companyArg.type to the browser context + // Wait for the options and select the one matching companyArg.type + await page.waitForSelector('#form\\:registerArt_items'); // Ensure dropdown options are loaded + await page.evaluate((type) => { + const options = Array.from(document.querySelectorAll('#form\\:registerArt_items li')); + const targetOption = options.find((option) => option.textContent?.trim() === type); // Match type dynamically + (targetOption as any)?.click(); + }, companyArg.type); // Pass companyArg.type to the browser context -// 2) Register number: -// Fill in the register number -await page.waitForSelector('#form\\:registerNummer'); -await page.type('#form\\:registerNummer', companyArg.number); + // 2) Register number: + // Fill in the register number + await page.waitForSelector('#form\\:registerNummer'); + await page.type('#form\\:registerNummer', companyArg.number); -// 3) Register court: -// Open the dropdown for the register court -await page.waitForSelector('#form\\:registergericht_label'); -await page.click('#form\\:registergericht_label'); // Open the dropdown + // 3) Register court: + // Open the dropdown for the register court + await page.waitForSelector('#form\\:registergericht_label'); + await page.click('#form\\:registergericht_label'); // Open the dropdown -// Wait for the options and select the one matching companyArg.court -await page.waitForSelector('#form\\:registergericht_items'); // Ensure dropdown options are loaded -await page.evaluate((court) => { - const options = Array.from(document.querySelectorAll('#form\\:registergericht_items li')); - const targetOption = options.find((option) => option.textContent?.trim() === court); // Match court dynamically - (targetOption as any)?.click(); -}, companyArg.court); // Pass companyArg.court to the browser context + // Wait for the options and select the one matching companyArg.court + await page.waitForSelector('#form\\:registergericht_items'); // Ensure dropdown options are loaded + await page.evaluate((court) => { + const options = Array.from(document.querySelectorAll('#form\\:registergericht_items li')); + const targetOption = options.find((option) => option.textContent?.trim() === court); // Match court dynamically + (targetOption as any)?.click(); + }, companyArg.court); // Pass companyArg.court to the browser context await this.clickFindButton(page); const businessRecords = await this.waitForResults(page); console.log(businessRecords); + + // Define the response listener + const responseListener = async ( + response: plugins.smartbrowser.smartpuppeteer.puppeteer.HTTPResponse + ) => { + // Ignore preflight (OPTIONS) requests + if (response.request().method() === 'OPTIONS') { + console.log(`Ignoring preflight request: ${response.url()}`); + return; + } + + // Check for downloads (Content-Disposition header) + const contentDisposition = response.headers()['content-disposition']; + + if (contentDisposition && contentDisposition.includes('attachment')) { + console.log(`Download detected: ${response.url()}`); + try { + const buffer = await response.buffer(); + console.log(`Downloaded file size: ${buffer.length} bytes`); + } catch (error) { + console.error('Error downloading file:', error); + } + } + }; + page.on('response', responseListener); + + // Click the element + await page.evaluate(() => { + // Locate the table body + const tableBody = document.querySelector( + '#ergebnissForm\\:selectedSuchErgebnisFormTable_data' + ); + if (!tableBody) { + throw new Error('Table body not found'); + } + + // Locate the first row + const firstRow = tableBody.querySelector('tr:nth-child(1)'); + if (!firstRow) { + throw new Error('First row not found'); + } + + // Locate the last cell in the first row + const lastCell = firstRow.querySelector('td:last-child'); + if (!lastCell) { + throw new Error('Last cell not found in the first row'); + } + + // Locate the last element in the last cell + const lastLink = lastCell.querySelector('a:last-of-type'); + if (!lastLink) { + throw new Error('Last link not found in the last cell'); + } + + // Simulate a click on the last element + (lastLink as HTMLElement).click(); + }); + + // Optional: Wait for some response or navigation triggered by the click + await page.waitForTimeout(10000); + + page.off('response', responseListener); } }