import type { BusinessRecord } from './classes.businessrecord.js'; import type { OpenData } from './classes.main.opendata.js'; import * as plugins from './plugins.js'; import * as paths from './paths.js'; /** * the HandlesRegister exposed as a class */ export class HandelsRegister { private openDataRef: OpenData; private asyncExecutionStack = new plugins.lik.AsyncExecutionStack(); private uniqueDowloadFolder = plugins.path.join(paths.downloadDir, plugins.smartunique.uniSimple()); // Puppeteer wrapper instance public smartbrowserInstance = new plugins.smartbrowser.SmartBrowser(); constructor(openDataRef: OpenData) { this.openDataRef = openDataRef; } public async start() { // Start the browser await plugins.smartfile.fs.ensureDir(this.uniqueDowloadFolder); await this.smartbrowserInstance.start(); } public async stop() { // Stop the browser await plugins.smartfile.fs.remove(this.uniqueDowloadFolder); await this.smartbrowserInstance.stop(); } /** * Creates a new page and configures it to allow file downloads * to a predefined path. */ public getNewPage = async () => { const page = await this.smartbrowserInstance.headlessBrowser.newPage(); // 1) Create a DevTools session for this page const cdpSession = await page.target().createCDPSession(); // 2) Allow file downloads and set the download path await cdpSession.send('Page.setDownloadBehavior', { behavior: 'allow', downloadPath: this.uniqueDowloadFolder, // <-- Change this to your desired absolute path }); // Optionally set viewport and go to page await page.setViewport({ width: 1920, height: 1080 }); await page.goto('https://www.handelsregister.de/'); return page; }; private navigateToPage = async ( pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page, pageNameArg: string ) => { try { await pageArg.evaluate((pageNameArg2) => { const elements = Array.from(document.querySelectorAll('.ui-menuitem-text > span')); const targetElement = elements.find((el) => el.textContent?.trim() === pageNameArg2); if (targetElement) { (targetElement as HTMLElement).click(); } }, pageNameArg); console.log(`Navigated to the ${pageNameArg} page successfully.`); } catch (error) { console.error(`Failed to navigate to the ${pageNameArg} page:`, error); } }; private waitForResults = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => { await pageArg .waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', { timeout: 30000, }) .catch(async (err) => { await pageArg.screenshot({ path: paths.downloadDir + '/error.png' }); throw err; }); const businessRecords: BusinessRecord['data'][] = await pageArg.evaluate(() => { const rows = document.querySelectorAll( '#ergebnissForm\\:selectedSuchErgebnisFormTable_data > tr' ); const records: BusinessRecord['data'][] = []; rows.forEach((row) => { const nameElement = row.querySelector('td.ui-panelgrid-cell span.marginLeft20'); const cityElement = row.querySelector('td.ui-panelgrid-cell.sitzSuchErgebnisse span'); const statusElement = row.querySelector('td.ui-panelgrid-cell span.verticalText'); const registrationCourtElement = row.querySelector( 'td.ui-panelgrid-cell.fontTableNameSize' ); const name = nameElement?.textContent?.trim(); const city = cityElement?.textContent?.trim(); const status = statusElement?.textContent?.trim(); const registrationId = registrationCourtElement?.textContent?.trim(); // Push parsed data into records array records.push({ name, city, registrationId, businessType: status, }); }); return records; }); return businessRecords; }; private clickFindButton = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => { try { // Wait for the button with the text "Find" to appear await pageArg.waitForSelector('span.ui-button-text.ui-c', { timeout: 5000 }); // adjust to 100 results per page await pageArg.select('#form\\:ergebnisseProSeite_input', '100'); // Locate and click the button using its text await pageArg.evaluate(() => { const buttons = Array.from(document.querySelectorAll('span.ui-button-text.ui-c')); const targetButton = buttons.find((button) => button.textContent?.trim() === 'Find'); if (targetButton) { const parentButton = targetButton.closest('button') || targetButton; (parentButton as HTMLElement).click(); } }); console.log('Find button clicked successfully!'); } catch (error) { console.error('Failed to find or click the "Find" button:', error); } }; private async downloadFile( pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page, typeArg: 'SI' | 'AD' ) { // Trigger the file download by clicking on the relevant link await pageArg.evaluate((typeArg2) => { // Locate the table body const tableBody = document.querySelector( '#ergebnissForm\\:selectedSuchErgebnisFormTable_data' ); if (!tableBody) { throw new Error('Table body not found'); } // Locate the first row const firstRow = tableBody.querySelector('tr:nth-child(1)'); if (!firstRow) { throw new Error('First row not found'); } // Locate the last cell in the first row const lastCell = firstRow.querySelector('td:last-child'); if (!lastCell) { throw new Error('Last cell not found in the first row'); } // Locate the download links const adLink = lastCell.querySelector('a:first-of-type'); const siLink = lastCell.querySelector('a:last-of-type'); if (!siLink) { throw new Error('SI link not found in the last cell'); } // Simulate a click on the last element switch (typeArg2) { case 'AD': (adLink as HTMLElement).click(); break; case 'SI': (siLink as HTMLElement).click(); break; default: throw new Error('Invalid file type'); } }, typeArg); // Wait a bit for the download to complete (you might want to implement // a more robust file-exists check or a wait-for-download library) await pageArg.waitForTimeout(10000); const files = await plugins.smartfile.fs.fileTreeToObject(this.uniqueDowloadFolder, '**/*'); await plugins.smartfile.fs.ensureEmptyDir(this.uniqueDowloadFolder); return files [0]; } /** * Helper method to parse the German registration string */ private async parseGermanRegistration( input: string ): Promise { // e.g. District court Berlin (Charlottenburg) HRB 123456 const regex = /District court (\p{L}[\p{L}\s-]*?(?:\s*\([\p{L}\s-]+\))?)\s+(HRA|HRB|GnR|VR|PR|GsR)\s+(\d+)/u; const match = input.match(regex); if (match) { return { court: match[1], type: match[2] as 'HRA' | 'HRB', // Adjust if needed number: match[3], }; } } /** * Search for a company by name and return basic info */ public async searchCompany(companyNameArg: string) { return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => { const page = await this.getNewPage(); await this.navigateToPage(page, 'Normal search'); try { // Wait for the textarea to appear await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 }); // Enter text into the textarea const inputText = companyNameArg; await page.evaluate((text) => { const textarea = document.querySelector('#form\\:schlagwoerter'); if (textarea) { textarea.value = text; // Set the value // Trigger the change event manually if required const event = new Event('change', { bubbles: true }); textarea.dispatchEvent(event); } }, inputText); console.log('Text entered successfully!'); } catch (error) { console.error('Failed to find or enter text into the textarea:', error); } try { // Wait for the radio button's label to appear await page.waitForSelector('label[for="form:schlagwortOptionen:0"]', { timeout: 5000 }); // Click the label to select the radio button await page.evaluate(() => { const label = document.querySelector( 'label[for="form:schlagwortOptionen:0"]' ); if (label) { label.click(); } }); console.log('Radio button clicked successfully!'); } catch (error) { console.error('Failed to find or click the radio button:', error); } await this.clickFindButton(page); const businessRecords = await this.waitForResults(page); // Parse out the registration info for (const record of businessRecords) { if (record.registrationId) { record.germanParsedRegistration = await this.parseGermanRegistration( record.registrationId ); } } await page.close(); return businessRecords; }, 60000); } /** * Search for a specific company (known register type/number/court), * then click on an element that triggers a file download. */ public async getSpecificCompany(companyArg: BusinessRecord['data']['germanParsedRegistration']) { return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => { const page = await this.getNewPage(); await this.navigateToPage(page, 'Normal search'); await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 }); // 1) Type of Register (e.g. HRB, HRA, etc.) await page.waitForSelector('#form\\:registerArt_label'); await page.click('#form\\:registerArt_label'); await page.waitForSelector('#form\\:registerArt_items'); await page.evaluate((type) => { const options = Array.from(document.querySelectorAll('#form\\:registerArt_items li')); const targetOption = options.find((option) => option.textContent?.trim() === type); (targetOption as HTMLElement)?.click(); }, companyArg.type); // 2) Register number await page.waitForSelector('#form\\:registerNummer'); await page.type('#form\\:registerNummer', companyArg.number); // 3) Register court await page.waitForSelector('#form\\:registergericht_label'); await page.click('#form\\:registergericht_label'); await page.waitForSelector('#form\\:registergericht_items'); await page.evaluate((court) => { const options = Array.from(document.querySelectorAll('#form\\:registergericht_items li')); const targetOption = options.find((option) => option.textContent?.trim() === court); (targetOption as HTMLElement)?.click(); }, companyArg.court); // Click 'Find' await this.clickFindButton(page); // Optionally grab the results, just for logging const businessRecords = await this.waitForResults(page); console.log(businessRecords); const files: plugins.smartfile.SmartFile[] = []; // download files files.push(await this.downloadFile(page, 'SI')); files.push(await this.downloadFile(page, 'AD')); // At this point, the file should have been downloaded automatically // to the path specified by `Page.setDownloadBehavior` await page.close(); return { businessRecords, files, }; }, 60000); } }