feat(core): Enhanced data handling capabilities and improved company search functionalities.
This commit is contained in:
@ -1,3 +1,4 @@
|
||||
import type { BusinessRecord } from './classes.businessrecord.js';
|
||||
import type { OpenData } from './classes.main.opendata.js';
|
||||
import * as plugins from './plugins.js';
|
||||
|
||||
@ -21,21 +22,117 @@ export class HandelsRegister {
|
||||
await this.smartbrowserInstance.stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a company by name
|
||||
*/
|
||||
public async getDataForCompany(companyNameArg: string) {
|
||||
// page stuff
|
||||
public getNewPage = async () => {
|
||||
const page = await this.smartbrowserInstance.headlessBrowser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
await page.goto('https://www.handelsregister.de/');
|
||||
await page.evaluate(() => {
|
||||
const elements = Array.from(document.querySelectorAll('.ui-menuitem-text > span'));
|
||||
const targetElement = elements.find((el) => el.textContent?.trim() === 'Normal search');
|
||||
if (targetElement) {
|
||||
(targetElement as HTMLElement).click();
|
||||
}
|
||||
return page;
|
||||
};
|
||||
|
||||
private navigateToPage = async (
|
||||
pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page,
|
||||
pageNameArg: string
|
||||
) => {
|
||||
try {
|
||||
await pageArg.evaluate((pageNameArg2) => {
|
||||
const elements = Array.from(document.querySelectorAll('.ui-menuitem-text > span'));
|
||||
const targetElement = elements.find((el) => el.textContent?.trim() === pageNameArg2);
|
||||
if (targetElement) {
|
||||
(targetElement as HTMLElement).click();
|
||||
}
|
||||
}, pageNameArg);
|
||||
console.log(`Navigated to the ${pageNameArg} page successfully.`);
|
||||
} catch (error) {
|
||||
console.error(`Failed to navigate to the ${pageNameArg} page:`, error);
|
||||
}
|
||||
};
|
||||
|
||||
private waitForResults = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => {
|
||||
await pageArg.waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', {
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
const businessRecords: BusinessRecord['data'][] = await pageArg.evaluate(() => {
|
||||
const rows = document.querySelectorAll(
|
||||
'#ergebnissForm\\:selectedSuchErgebnisFormTable_data > tr'
|
||||
);
|
||||
const records: BusinessRecord['data'][] = [];
|
||||
|
||||
rows.forEach((row) => {
|
||||
const nameElement = row.querySelector('td.ui-panelgrid-cell span.marginLeft20');
|
||||
const cityElement = row.querySelector('td.ui-panelgrid-cell.sitzSuchErgebnisse span');
|
||||
const statusElement = row.querySelector('td.ui-panelgrid-cell span.verticalText');
|
||||
const registrationCourtElement = row.querySelector(
|
||||
'td.ui-panelgrid-cell.fontTableNameSize'
|
||||
);
|
||||
|
||||
const name = nameElement?.textContent?.trim();
|
||||
const city = cityElement?.textContent?.trim();
|
||||
const status = statusElement?.textContent?.trim();
|
||||
const registrationId = registrationCourtElement?.textContent?.trim();
|
||||
|
||||
// Push parsed data into records array
|
||||
records.push({
|
||||
name,
|
||||
city,
|
||||
registrationId,
|
||||
businessType: status,
|
||||
});
|
||||
});
|
||||
|
||||
return records;
|
||||
});
|
||||
return businessRecords;
|
||||
};
|
||||
|
||||
private clickFindButton = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => {
|
||||
try {
|
||||
// Wait for the button with the text "Find" to appear
|
||||
await pageArg.waitForSelector('span.ui-button-text.ui-c', { timeout: 5000 });
|
||||
|
||||
// adjust to 100 results per page
|
||||
await pageArg.select('#form\\:ergebnisseProSeite_input', '100');
|
||||
|
||||
// Locate and click the button using its text
|
||||
await pageArg.evaluate(() => {
|
||||
const buttons = Array.from(document.querySelectorAll('span.ui-button-text.ui-c'));
|
||||
const targetButton = buttons.find((button) => button.textContent?.trim() === 'Find');
|
||||
if (targetButton) {
|
||||
const parentButton = targetButton.closest('button') || targetButton;
|
||||
(parentButton as HTMLElement).click();
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Find button clicked successfully!');
|
||||
} catch (error) {
|
||||
console.error('Failed to find or click the "Find" button:', error);
|
||||
}
|
||||
};
|
||||
|
||||
// parsing stuff
|
||||
private async parseGermanRegistration(
|
||||
input: string
|
||||
): Promise<BusinessRecord['data']['germanParsedRegistration']> {
|
||||
const regex = /District court (\p{L}[\p{L}\s-]*?(?:\s*\([\p{L}\s-]+\))?)\s+(HRA|HRB|GnR|VR|PR|GsR)\s+(\d+)/u;
|
||||
const match = input.match(regex);
|
||||
|
||||
if (match) {
|
||||
return {
|
||||
court: match[1], // Extracts the court name
|
||||
type: match[2] as 'HRA' | 'HRB', // Extracts the type and ensures it matches the specified types
|
||||
number: match[3], // Extracts the number
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a company by name
|
||||
*/
|
||||
public async searchCompany(companyNameArg: string) {
|
||||
const page = await this.getNewPage();
|
||||
await this.navigateToPage(page, 'Normal search');
|
||||
|
||||
try {
|
||||
// Wait for the textarea to appear
|
||||
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
|
||||
@ -76,63 +173,59 @@ export class HandelsRegister {
|
||||
console.error('Failed to find or click the radio button:', error);
|
||||
}
|
||||
|
||||
try {
|
||||
// Wait for the button with the text "Find" to appear
|
||||
await page.waitForSelector('span.ui-button-text.ui-c', { timeout: 5000 });
|
||||
await this.clickFindButton(page);
|
||||
|
||||
// Locate and click the button using its text
|
||||
await page.evaluate(() => {
|
||||
const buttons = Array.from(document.querySelectorAll('span.ui-button-text.ui-c'));
|
||||
const targetButton = buttons.find((button) => button.textContent?.trim() === 'Find');
|
||||
if (targetButton) {
|
||||
const parentButton = targetButton.closest('button') || targetButton;
|
||||
(parentButton as HTMLElement).click();
|
||||
}
|
||||
});
|
||||
const businessRecords = await this.waitForResults(page);
|
||||
|
||||
console.log('Find button clicked successfully!');
|
||||
} catch (error) {
|
||||
console.error('Failed to find or click the "Find" button:', error);
|
||||
for (const record of businessRecords) {
|
||||
record.germanParsedRegistration = await this.parseGermanRegistration(record.registrationId);
|
||||
}
|
||||
|
||||
await page.waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', {
|
||||
timeout: 10000,
|
||||
});
|
||||
|
||||
const businessRecords = await page.evaluate(() => {
|
||||
const rows = document.querySelectorAll(
|
||||
'#ergebnissForm\\:selectedSuchErgebnisFormTable_data > tr'
|
||||
);
|
||||
const records = [];
|
||||
|
||||
rows.forEach((row) => {
|
||||
const nameElement = row.querySelector('td.ui-panelgrid-cell span.marginLeft20');
|
||||
const cityElement = row.querySelector('td.ui-panelgrid-cell.sitzSuchErgebnisse span');
|
||||
const statusElement = row.querySelector('td.ui-panelgrid-cell span.verticalText');
|
||||
const registrationCourtElement = row.querySelector(
|
||||
'td.ui-panelgrid-cell.fontTableNameSize'
|
||||
);
|
||||
|
||||
const name = nameElement?.textContent?.trim();
|
||||
const city = cityElement?.textContent?.trim();
|
||||
const status = statusElement?.textContent?.trim();
|
||||
const registrationCourt = registrationCourtElement?.textContent?.trim();
|
||||
|
||||
// Push parsed data into records array
|
||||
records.push({
|
||||
name,
|
||||
city,
|
||||
registrationCourt,
|
||||
businessType: status,
|
||||
});
|
||||
});
|
||||
|
||||
return records;
|
||||
});
|
||||
|
||||
await page.close();
|
||||
|
||||
// Finally, we return an object, which triggers a JSON file download
|
||||
return businessRecords;
|
||||
}
|
||||
|
||||
public async getSpecificCompany(companyArg: BusinessRecord['data']['germanParsedRegistration']) {
|
||||
const page = await this.getNewPage();
|
||||
await this.navigateToPage(page, 'Normal search');
|
||||
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
|
||||
|
||||
// 1) Type of Register:
|
||||
// Open the dropdown to reveal options
|
||||
await page.waitForSelector('#form\\:registerArt_label');
|
||||
await page.click('#form\\:registerArt_label'); // Open the dropdown
|
||||
|
||||
// Wait for the options and select the one matching companyArg.type
|
||||
await page.waitForSelector('#form\\:registerArt_items'); // Ensure dropdown options are loaded
|
||||
await page.evaluate((type) => {
|
||||
const options = Array.from(document.querySelectorAll('#form\\:registerArt_items li'));
|
||||
const targetOption = options.find((option) => option.textContent?.trim() === type); // Match type dynamically
|
||||
(targetOption as any)?.click();
|
||||
}, companyArg.type); // Pass companyArg.type to the browser context
|
||||
|
||||
// 2) Register number:
|
||||
// Fill in the register number
|
||||
await page.waitForSelector('#form\\:registerNummer');
|
||||
await page.type('#form\\:registerNummer', companyArg.number);
|
||||
|
||||
// 3) Register court:
|
||||
// Open the dropdown for the register court
|
||||
await page.waitForSelector('#form\\:registergericht_label');
|
||||
await page.click('#form\\:registergericht_label'); // Open the dropdown
|
||||
|
||||
// Wait for the options and select the one matching companyArg.court
|
||||
await page.waitForSelector('#form\\:registergericht_items'); // Ensure dropdown options are loaded
|
||||
await page.evaluate((court) => {
|
||||
const options = Array.from(document.querySelectorAll('#form\\:registergericht_items li'));
|
||||
const targetOption = options.find((option) => option.textContent?.trim() === court); // Match court dynamically
|
||||
(targetOption as any)?.click();
|
||||
}, companyArg.court); // Pass companyArg.court to the browser context
|
||||
|
||||
await this.clickFindButton(page);
|
||||
|
||||
const businessRecords = await this.waitForResults(page);
|
||||
console.log(businessRecords);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user