2025-01-03 01:36:26 +01:00
|
|
|
import type { BusinessRecord } from './classes.businessrecord.js';
|
2025-01-02 01:26:34 +01:00
|
|
|
import type { OpenData } from './classes.main.opendata.js';
|
|
|
|
import * as plugins from './plugins.js';
|
2025-01-04 02:27:53 +01:00
|
|
|
import * as paths from './paths.js';
|
2025-01-02 01:26:34 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* the HandlesRegister exposed as a class
|
|
|
|
*/
|
|
|
|
export class HandelsRegister {
|
|
|
|
private openDataRef: OpenData;
|
2025-01-04 02:27:53 +01:00
|
|
|
private asyncExecutionStack = new plugins.lik.AsyncExecutionStack();
|
|
|
|
private uniqueDowloadFolder = plugins.path.join(paths.downloadDir, plugins.smartunique.uniSimple());
|
2025-01-02 01:26:34 +01:00
|
|
|
|
2025-01-04 02:27:53 +01:00
|
|
|
// Puppeteer wrapper instance
|
2025-01-02 01:26:34 +01:00
|
|
|
public smartbrowserInstance = new plugins.smartbrowser.SmartBrowser();
|
|
|
|
|
|
|
|
constructor(openDataRef: OpenData) {
|
|
|
|
this.openDataRef = openDataRef;
|
|
|
|
}
|
|
|
|
|
|
|
|
public async start() {
|
2025-01-04 02:27:53 +01:00
|
|
|
// Start the browser
|
|
|
|
await plugins.smartfile.fs.ensureDir(this.uniqueDowloadFolder);
|
2025-01-02 01:26:34 +01:00
|
|
|
await this.smartbrowserInstance.start();
|
|
|
|
}
|
|
|
|
|
|
|
|
public async stop() {
|
2025-01-04 02:27:53 +01:00
|
|
|
// Stop the browser
|
|
|
|
await plugins.smartfile.fs.remove(this.uniqueDowloadFolder);
|
2025-01-02 01:26:34 +01:00
|
|
|
await this.smartbrowserInstance.stop();
|
|
|
|
}
|
|
|
|
|
2025-01-04 02:27:53 +01:00
|
|
|
/**
|
|
|
|
* Creates a new page and configures it to allow file downloads
|
|
|
|
* to a predefined path.
|
|
|
|
*/
|
2025-01-03 01:36:26 +01:00
|
|
|
public getNewPage = async () => {
|
2025-01-02 01:26:34 +01:00
|
|
|
const page = await this.smartbrowserInstance.headlessBrowser.newPage();
|
2025-01-04 02:27:53 +01:00
|
|
|
|
|
|
|
// 1) Create a DevTools session for this page
|
|
|
|
const cdpSession = await page.target().createCDPSession();
|
|
|
|
|
|
|
|
// 2) Allow file downloads and set the download path
|
|
|
|
await cdpSession.send('Page.setDownloadBehavior', {
|
|
|
|
behavior: 'allow',
|
|
|
|
downloadPath: this.uniqueDowloadFolder, // <-- Change this to your desired absolute path
|
|
|
|
});
|
|
|
|
|
|
|
|
// Optionally set viewport and go to page
|
2025-01-02 01:26:34 +01:00
|
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
|
|
await page.goto('https://www.handelsregister.de/');
|
2025-01-03 01:36:26 +01:00
|
|
|
return page;
|
|
|
|
};
|
|
|
|
|
|
|
|
private navigateToPage = async (
|
|
|
|
pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page,
|
|
|
|
pageNameArg: string
|
|
|
|
) => {
|
|
|
|
try {
|
|
|
|
await pageArg.evaluate((pageNameArg2) => {
|
|
|
|
const elements = Array.from(document.querySelectorAll('.ui-menuitem-text > span'));
|
|
|
|
const targetElement = elements.find((el) => el.textContent?.trim() === pageNameArg2);
|
|
|
|
if (targetElement) {
|
|
|
|
(targetElement as HTMLElement).click();
|
|
|
|
}
|
|
|
|
}, pageNameArg);
|
|
|
|
console.log(`Navigated to the ${pageNameArg} page successfully.`);
|
|
|
|
} catch (error) {
|
|
|
|
console.error(`Failed to navigate to the ${pageNameArg} page:`, error);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
private waitForResults = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page) => {
|
2025-01-04 02:27:53 +01:00
|
|
|
await pageArg
|
|
|
|
.waitForSelector('#ergebnissForm\\:selectedSuchErgebnisFormTable_data', {
|
|
|
|
timeout: 30000,
|
|
|
|
})
|
|
|
|
.catch(async (err) => {
|
|
|
|
await pageArg.screenshot({ path: paths.downloadDir + '/error.png' });
|
|
|
|
throw err;
|
|
|
|
});
|
2025-01-02 01:26:34 +01:00
|
|
|
|
2025-01-03 01:36:26 +01:00
|
|
|
const businessRecords: BusinessRecord['data'][] = await pageArg.evaluate(() => {
|
|
|
|
const rows = document.querySelectorAll(
|
|
|
|
'#ergebnissForm\\:selectedSuchErgebnisFormTable_data > tr'
|
|
|
|
);
|
|
|
|
const records: BusinessRecord['data'][] = [];
|
|
|
|
|
|
|
|
rows.forEach((row) => {
|
|
|
|
const nameElement = row.querySelector('td.ui-panelgrid-cell span.marginLeft20');
|
|
|
|
const cityElement = row.querySelector('td.ui-panelgrid-cell.sitzSuchErgebnisse span');
|
|
|
|
const statusElement = row.querySelector('td.ui-panelgrid-cell span.verticalText');
|
|
|
|
const registrationCourtElement = row.querySelector(
|
|
|
|
'td.ui-panelgrid-cell.fontTableNameSize'
|
|
|
|
);
|
|
|
|
|
|
|
|
const name = nameElement?.textContent?.trim();
|
|
|
|
const city = cityElement?.textContent?.trim();
|
|
|
|
const status = statusElement?.textContent?.trim();
|
|
|
|
const registrationId = registrationCourtElement?.textContent?.trim();
|
|
|
|
|
|
|
|
// Push parsed data into records array
|
|
|
|
records.push({
|
|
|
|
name,
|
|
|
|
city,
|
|
|
|
registrationId,
|
|
|
|
businessType: status,
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
return records;
|
|
|
|
});
|
|
|
|
return businessRecords;
|
|
|
|
};
|
|
|
|
|
2025-01-07 05:06:16 +01:00
|
|
|
private clickFindButton = async (pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page, resultsLimitArg: number = 100) => {
|
2025-01-03 01:36:26 +01:00
|
|
|
try {
|
|
|
|
// Wait for the button with the text "Find" to appear
|
|
|
|
await pageArg.waitForSelector('span.ui-button-text.ui-c', { timeout: 5000 });
|
|
|
|
|
|
|
|
// adjust to 100 results per page
|
2025-01-07 05:06:16 +01:00
|
|
|
await pageArg.select('#form\\:ergebnisseProSeite_input', `${resultsLimitArg}`);
|
2025-01-03 01:36:26 +01:00
|
|
|
|
|
|
|
// Locate and click the button using its text
|
|
|
|
await pageArg.evaluate(() => {
|
|
|
|
const buttons = Array.from(document.querySelectorAll('span.ui-button-text.ui-c'));
|
|
|
|
const targetButton = buttons.find((button) => button.textContent?.trim() === 'Find');
|
|
|
|
if (targetButton) {
|
|
|
|
const parentButton = targetButton.closest('button') || targetButton;
|
|
|
|
(parentButton as HTMLElement).click();
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
console.log('Find button clicked successfully!');
|
|
|
|
} catch (error) {
|
|
|
|
console.error('Failed to find or click the "Find" button:', error);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2025-01-04 02:27:53 +01:00
|
|
|
private async downloadFile(
|
|
|
|
pageArg: plugins.smartbrowser.smartpuppeteer.puppeteer.Page,
|
|
|
|
typeArg: 'SI' | 'AD'
|
|
|
|
) {
|
|
|
|
// Trigger the file download by clicking on the relevant link
|
|
|
|
await pageArg.evaluate((typeArg2) => {
|
|
|
|
// Locate the table body
|
|
|
|
const tableBody = document.querySelector(
|
|
|
|
'#ergebnissForm\\:selectedSuchErgebnisFormTable_data'
|
|
|
|
);
|
|
|
|
if (!tableBody) {
|
|
|
|
throw new Error('Table body not found');
|
|
|
|
}
|
|
|
|
|
|
|
|
// Locate the first row
|
|
|
|
const firstRow = tableBody.querySelector('tr:nth-child(1)');
|
|
|
|
if (!firstRow) {
|
|
|
|
throw new Error('First row not found');
|
|
|
|
}
|
|
|
|
|
|
|
|
// Locate the last cell in the first row
|
|
|
|
const lastCell = firstRow.querySelector('td:last-child');
|
|
|
|
if (!lastCell) {
|
|
|
|
throw new Error('Last cell not found in the first row');
|
|
|
|
}
|
|
|
|
|
|
|
|
// Locate the download links
|
|
|
|
const adLink = lastCell.querySelector('a:first-of-type');
|
|
|
|
const siLink = lastCell.querySelector('a:last-of-type');
|
|
|
|
if (!siLink) {
|
|
|
|
throw new Error('SI link not found in the last cell');
|
|
|
|
}
|
|
|
|
|
|
|
|
// Simulate a click on the last <a> element
|
|
|
|
switch (typeArg2) {
|
|
|
|
case 'AD':
|
|
|
|
(adLink as HTMLElement).click();
|
|
|
|
break;
|
|
|
|
case 'SI':
|
|
|
|
(siLink as HTMLElement).click();
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
throw new Error('Invalid file type');
|
|
|
|
}
|
|
|
|
}, typeArg);
|
|
|
|
|
2025-01-07 05:06:16 +01:00
|
|
|
|
|
|
|
await plugins.smartfile.fs.waitForFileToBeReady(this.uniqueDowloadFolder);
|
2025-01-04 02:27:53 +01:00
|
|
|
|
|
|
|
const files = await plugins.smartfile.fs.fileTreeToObject(this.uniqueDowloadFolder, '**/*');
|
2025-01-07 05:06:16 +01:00
|
|
|
const file = files[0];
|
2025-01-04 02:27:53 +01:00
|
|
|
|
2025-01-07 05:06:16 +01:00
|
|
|
// lets clear the folder for the next download
|
|
|
|
await plugins.smartfile.fs.ensureEmptyDir(this.uniqueDowloadFolder);
|
|
|
|
switch (typeArg) {
|
|
|
|
case 'AD':
|
|
|
|
await file.rename(`ad.pdf`);
|
|
|
|
break;
|
|
|
|
case 'SI':
|
|
|
|
await file.rename(`si.xml`);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return file;
|
2025-01-04 02:27:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Helper method to parse the German registration string
|
|
|
|
*/
|
2025-01-03 01:36:26 +01:00
|
|
|
private async parseGermanRegistration(
|
|
|
|
input: string
|
|
|
|
): Promise<BusinessRecord['data']['germanParsedRegistration']> {
|
2025-01-04 02:27:53 +01:00
|
|
|
// e.g. District court Berlin (Charlottenburg) HRB 123456
|
2025-01-03 02:19:07 +01:00
|
|
|
const regex =
|
|
|
|
/District court (\p{L}[\p{L}\s-]*?(?:\s*\([\p{L}\s-]+\))?)\s+(HRA|HRB|GnR|VR|PR|GsR)\s+(\d+)/u;
|
2025-01-03 01:36:26 +01:00
|
|
|
const match = input.match(regex);
|
|
|
|
|
|
|
|
if (match) {
|
|
|
|
return {
|
2025-01-04 02:27:53 +01:00
|
|
|
court: match[1],
|
|
|
|
type: match[2] as 'HRA' | 'HRB', // Adjust if needed
|
|
|
|
number: match[3],
|
2025-01-03 01:36:26 +01:00
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2025-01-04 02:27:53 +01:00
|
|
|
* Search for a company by name and return basic info
|
2025-01-03 01:36:26 +01:00
|
|
|
*/
|
2025-01-07 05:06:16 +01:00
|
|
|
public async searchCompany(companyNameArg: string, resultsLimitArg: number = 100) {
|
2025-01-04 02:27:53 +01:00
|
|
|
return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => {
|
|
|
|
const page = await this.getNewPage();
|
|
|
|
await this.navigateToPage(page, 'Normal search');
|
|
|
|
|
|
|
|
try {
|
|
|
|
// Wait for the textarea to appear
|
|
|
|
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
|
|
|
|
|
|
|
|
// Enter text into the textarea
|
|
|
|
const inputText = companyNameArg;
|
|
|
|
await page.evaluate((text) => {
|
|
|
|
const textarea = document.querySelector<HTMLTextAreaElement>('#form\\:schlagwoerter');
|
|
|
|
if (textarea) {
|
|
|
|
textarea.value = text; // Set the value
|
|
|
|
// Trigger the change event manually if required
|
|
|
|
const event = new Event('change', { bubbles: true });
|
|
|
|
textarea.dispatchEvent(event);
|
|
|
|
}
|
|
|
|
}, inputText);
|
|
|
|
|
|
|
|
console.log('Text entered successfully!');
|
|
|
|
} catch (error) {
|
|
|
|
console.error('Failed to find or enter text into the textarea:', error);
|
2025-01-03 02:19:07 +01:00
|
|
|
}
|
|
|
|
|
2025-01-04 02:27:53 +01:00
|
|
|
try {
|
|
|
|
// Wait for the radio button's label to appear
|
|
|
|
await page.waitForSelector('label[for="form:schlagwortOptionen:0"]', { timeout: 5000 });
|
|
|
|
|
|
|
|
// Click the label to select the radio button
|
|
|
|
await page.evaluate(() => {
|
|
|
|
const label = document.querySelector<HTMLLabelElement>(
|
|
|
|
'label[for="form:schlagwortOptionen:0"]'
|
|
|
|
);
|
|
|
|
if (label) {
|
|
|
|
label.click();
|
|
|
|
}
|
|
|
|
});
|
2025-01-03 02:19:07 +01:00
|
|
|
|
2025-01-04 02:27:53 +01:00
|
|
|
console.log('Radio button clicked successfully!');
|
|
|
|
} catch (error) {
|
|
|
|
console.error('Failed to find or click the radio button:', error);
|
2025-01-03 02:19:07 +01:00
|
|
|
}
|
|
|
|
|
2025-01-07 05:06:16 +01:00
|
|
|
await this.clickFindButton(page, resultsLimitArg);
|
2025-01-03 02:19:07 +01:00
|
|
|
|
2025-01-04 02:27:53 +01:00
|
|
|
const businessRecords = await this.waitForResults(page);
|
2025-01-03 02:19:07 +01:00
|
|
|
|
2025-01-04 02:27:53 +01:00
|
|
|
// Parse out the registration info
|
|
|
|
for (const record of businessRecords) {
|
|
|
|
if (record.registrationId) {
|
|
|
|
record.germanParsedRegistration = await this.parseGermanRegistration(
|
|
|
|
record.registrationId
|
|
|
|
);
|
|
|
|
}
|
2025-01-03 02:19:07 +01:00
|
|
|
}
|
|
|
|
|
2025-01-04 02:27:53 +01:00
|
|
|
await page.close();
|
|
|
|
return businessRecords;
|
|
|
|
}, 60000);
|
|
|
|
}
|
2025-01-03 02:19:07 +01:00
|
|
|
|
2025-01-04 02:27:53 +01:00
|
|
|
/**
|
|
|
|
* Search for a specific company (known register type/number/court),
|
|
|
|
* then click on an element that triggers a file download.
|
|
|
|
*/
|
|
|
|
public async getSpecificCompany(companyArg: BusinessRecord['data']['germanParsedRegistration']) {
|
|
|
|
return this.asyncExecutionStack.getExclusiveExecutionSlot(async () => {
|
|
|
|
const page = await this.getNewPage();
|
|
|
|
await this.navigateToPage(page, 'Normal search');
|
|
|
|
await page.waitForSelector('#form\\:schlagwoerter', { timeout: 5000 });
|
2025-01-03 02:19:07 +01:00
|
|
|
|
2025-01-04 02:27:53 +01:00
|
|
|
// 1) Type of Register (e.g. HRB, HRA, etc.)
|
|
|
|
await page.waitForSelector('#form\\:registerArt_label');
|
|
|
|
await page.click('#form\\:registerArt_label');
|
|
|
|
await page.waitForSelector('#form\\:registerArt_items');
|
|
|
|
await page.evaluate((type) => {
|
|
|
|
const options = Array.from(document.querySelectorAll('#form\\:registerArt_items li'));
|
|
|
|
const targetOption = options.find((option) => option.textContent?.trim() === type);
|
|
|
|
(targetOption as HTMLElement)?.click();
|
|
|
|
}, companyArg.type);
|
|
|
|
|
|
|
|
// 2) Register number
|
|
|
|
await page.waitForSelector('#form\\:registerNummer');
|
|
|
|
await page.type('#form\\:registerNummer', companyArg.number);
|
|
|
|
|
|
|
|
// 3) Register court
|
|
|
|
await page.waitForSelector('#form\\:registergericht_label');
|
|
|
|
await page.click('#form\\:registergericht_label');
|
|
|
|
await page.waitForSelector('#form\\:registergericht_items');
|
|
|
|
await page.evaluate((court) => {
|
|
|
|
const options = Array.from(document.querySelectorAll('#form\\:registergericht_items li'));
|
|
|
|
const targetOption = options.find((option) => option.textContent?.trim() === court);
|
|
|
|
(targetOption as HTMLElement)?.click();
|
|
|
|
}, companyArg.court);
|
|
|
|
|
|
|
|
// Click 'Find'
|
|
|
|
await this.clickFindButton(page);
|
|
|
|
|
|
|
|
// Optionally grab the results, just for logging
|
|
|
|
const businessRecords = await this.waitForResults(page);
|
|
|
|
console.log(businessRecords);
|
|
|
|
|
|
|
|
const files: plugins.smartfile.SmartFile[] = [];
|
|
|
|
|
|
|
|
// download files
|
|
|
|
files.push(await this.downloadFile(page, 'SI'));
|
|
|
|
files.push(await this.downloadFile(page, 'AD'));
|
|
|
|
|
|
|
|
// At this point, the file should have been downloaded automatically
|
|
|
|
// to the path specified by `Page.setDownloadBehavior`
|
|
|
|
await page.close();
|
2025-01-03 02:19:07 +01:00
|
|
|
|
2025-01-04 02:27:53 +01:00
|
|
|
return {
|
|
|
|
businessRecords,
|
|
|
|
files,
|
|
|
|
};
|
|
|
|
}, 60000);
|
2025-01-03 01:36:26 +01:00
|
|
|
}
|
2025-01-07 05:06:16 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* get specific company by full name
|
|
|
|
*/
|
|
|
|
public async getSpecificCompanyByName(companyNameArg: string) {
|
|
|
|
const businessRecords = await this.searchCompany(companyNameArg, 1);
|
|
|
|
const result = this.getSpecificCompany(businessRecords[0].germanParsedRegistration);
|
|
|
|
return result;
|
|
|
|
}
|
2025-01-02 01:26:34 +01:00
|
|
|
}
|