import { spawnSync } from 'node:child_process'; import fs from 'node:fs'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; const packageRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..'); const sourceRoot = path.join(packageRoot, '.nogit', 'fee-schedules-sources'); const outDir = path.join(packageRoot, 'ts', 'germany'); const generatedAt = new Date().toISOString().slice(0, 10); const federalSources = [ { scheduleId: 'de-goae', fileName: 'goae', exportName: 'GERMANY_GOAE_FEE_SCHEDULE_DATA', sourcePath: 'go__1982', rowMode: 'code' }, { scheduleId: 'de-goz', fileName: 'goz', exportName: 'GERMANY_GOZ_FEE_SCHEDULE_DATA', sourcePath: 'goz_1987', rowMode: 'code' }, { scheduleId: 'de-gop', fileName: 'gop', exportName: 'GERMANY_GOP_FEE_SCHEDULE_DATA', sourcePath: 'gop', rowMode: 'none' }, { scheduleId: 'de-got', fileName: 'got', exportName: 'GERMANY_GOT_FEE_SCHEDULE_DATA', sourcePath: 'got_2022', rowMode: 'code' }, { scheduleId: 'de-stbvv', fileName: 'stbvv', exportName: 'GERMANY_STBVV_FEE_SCHEDULE_DATA', sourcePath: 'stbgebv', rowMode: 'table' }, { scheduleId: 'de-rvg', fileName: 'rvg', exportName: 'GERMANY_RVG_FEE_SCHEDULE_DATA', sourcePath: 'rvg', rowMode: 'code' }, { scheduleId: 'de-gnotkg', fileName: 'gnotkg', exportName: 'GERMANY_GNOTKG_FEE_SCHEDULE_DATA', sourcePath: 'gnotkg', rowMode: 'code' }, { scheduleId: 'de-gkg', fileName: 'gkg', exportName: 'GERMANY_GKG_FEE_SCHEDULE_DATA', sourcePath: 'gkg_2004', rowMode: 'code' }, { scheduleId: 'de-famgkg', fileName: 'famgkg', exportName: 'GERMANY_FAMGKG_FEE_SCHEDULE_DATA', sourcePath: 'famgkg', rowMode: 'code' }, { scheduleId: 'de-gvkostg', fileName: 'gvkostg', exportName: 'GERMANY_GVKOSTG_FEE_SCHEDULE_DATA', sourcePath: 'gvkostg', rowMode: 'code' }, { scheduleId: 'de-jveg', fileName: 'jveg', exportName: 'GERMANY_JVEG_FEE_SCHEDULE_DATA', sourcePath: 'jveg', rowMode: 'code' }, { scheduleId: 'de-insvv', fileName: 'insvv', exportName: 'GERMANY_INSVV_FEE_SCHEDULE_DATA', sourcePath: 'insvv', rowMode: 'none' }, { scheduleId: 'de-hoai', fileName: 'hoai', exportName: 'GERMANY_HOAI_FEE_SCHEDULE_DATA', sourcePath: 'hoai_2013', rowMode: 'table' }, { scheduleId: 'de-ampreisv', fileName: 'ampreisv', exportName: 'GERMANY_AMPREISV_FEE_SCHEDULE_DATA', sourcePath: 'ampreisv', rowMode: 'none' }, ]; const externalSources = [ { scheduleId: 'de-ebm', fileName: 'ebm', exportName: 'GERMANY_EBM_FEE_SCHEDULE_DATA', sourceName: 'Kassenärztliche Bundesvereinigung EBM', sourceUrl: 'https://www.kbv.de/html/ebm.php', note: 'External self-administration source; not part of gesetze-im-internet federal XML corpus.', }, { scheduleId: 'de-bema', fileName: 'bema', exportName: 'GERMANY_BEMA_FEE_SCHEDULE_DATA', sourceName: 'Kassenzahnärztliche Bundesvereinigung BEMA', sourceUrl: 'https://www.kzbv.de/bema', note: 'External dental self-administration source; not part of gesetze-im-internet federal XML corpus.', }, { scheduleId: 'de-ag-drg-fpv', fileName: 'agdrgfpv', exportName: 'GERMANY_AGDRGFPV_FEE_SCHEDULE_DATA', sourceName: 'InEK aG-DRG Fallpauschalen-Katalog / FPV', sourceUrl: 'https://www.g-drg.de', note: 'Year-specific InEK catalog source; not part of gesetze-im-internet federal XML corpus.', }, { scheduleId: 'de-uv-goae', fileName: 'uvgoae', exportName: 'GERMANY_UVGOAE_FEE_SCHEDULE_DATA', sourceName: 'Deutsche Gesetzliche Unfallversicherung UV-GOÄ', sourceUrl: 'https://www.dguv.de', note: 'External accident insurance fee schedule source; not part of gesetze-im-internet federal XML corpus.', }, { scheduleId: 'de-heilmittel-gkv', fileName: 'heilmittelgkv', exportName: 'GERMANY_HEILMITTELGKV_FEE_SCHEDULE_DATA', sourceName: 'GKV-Spitzenverband Heilmittel contracts and price lists', sourceUrl: 'https://www.gkv-spitzenverband.de/krankenversicherung/ambulante_leistungen/heilmittel/heilmittel.jsp', note: 'External contract and price list source by remedy area; not part of gesetze-im-internet federal XML corpus.', }, { scheduleId: 'de-hebammenhilfevertrag', fileName: 'hebammenhilfevertrag', exportName: 'GERMANY_HEBAMMENHILFEVERTRAG_FEE_SCHEDULE_DATA', sourceName: 'GKV-Spitzenverband Hebammenhilfevertrag', sourceUrl: 'https://www.gkv-spitzenverband.de/krankenversicherung/ambulante_leistungen/hebammen_geburtshaeuser/hebammenhilfevertrag/hebammenhilfevertrag.jsp', note: 'External contract and remuneration agreement source; not part of gesetze-im-internet federal XML corpus.', }, { scheduleId: 'de-gebueh', fileName: 'gebueh', exportName: 'GERMANY_GEBUEH_FEE_SCHEDULE_DATA', sourceName: 'Gebührenverzeichnis für Heilpraktiker', sourceUrl: 'https://www.gebueh.de', note: 'Non-official customary fee directory; not part of gesetze-im-internet federal XML corpus.', }, ]; const allSources = [...federalSources, ...externalSources]; fs.mkdirSync(sourceRoot, { recursive: true }); fs.mkdirSync(outDir, { recursive: true }); const decodeEntities = (input) => input .replace(/ | /g, ' ') .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/§/g, '§') .replace(/Ä/g, 'Ä') .replace(/Ö/g, 'Ö') .replace(/Ü/g, 'Ü') .replace(/ä/g, 'ä') .replace(/ö/g, 'ö') .replace(/ü/g, 'ü') .replace(/ß/g, 'ß'); const cleanXmlText = (input) => decodeEntities(input) .replace(//gi, ' ') .replace(/<[^>]+>/g, ' ') .replace(/\s+/g, ' ') .trim(); const extractFirst = (xml, tag) => { const match = xml.match(new RegExp(`<${tag}(?:\\s[^>]*)?>([\\s\\S]*?)<\\/${tag}>`)); return match ? cleanXmlText(match[1]) : undefined; }; const isCode = (value) => /^(?:\d{1,6}[a-z]?|[A-Z]\d{1,5}|[A-Z]{1,3}\s?\d{1,5})$/.test(value); const hasNumericValue = (value) => /\d/.test(value) && /^[\d\s.,€–\-]+$/.test(value); const parseAmount = (value) => { const numeric = value.match(/\d+(?:\.\d{3})*(?:,\d+)?|\d+(?:,\d+)?/g)?.at(-1); if (!numeric) { return undefined; } return Number(numeric.replace(/\./g, '').replace(',', '.')); }; const getRowType = (cells, mode) => { if (mode === 'code' && isCode(cells[0])) { return 'fee-entry'; } if (mode === 'table' && cells.length >= 2 && cells.filter(hasNumericValue).length >= 2) { return 'fee-table-row'; } return undefined; }; const writeGeneratedTsFile = (fileName, exportName, data) => { const filePath = path.join(outDir, `${fileName}.ts`); const body = [ `// Generated by scripts/generate-germany.mjs on ${generatedAt}.`, '// Do not edit this file manually.', "import type { IFeeScheduleData } from '../feeschedules.types.js';", '', `export const ${exportName}: IFeeScheduleData = ${JSON.stringify(data, null, 2)};`, '', ].join('\n'); fs.writeFileSync(filePath, body); }; const downloadFederalXml = async (source) => { const zipUrl = `https://www.gesetze-im-internet.de/${source.sourcePath}/xml.zip`; const zipPath = path.join(sourceRoot, `${source.sourcePath}.zip`); const extractDir = path.join(sourceRoot, source.sourcePath); fs.mkdirSync(extractDir, { recursive: true }); const response = await fetch(zipUrl); if (!response.ok) { throw new Error(`Failed to download ${zipUrl}: ${response.status} ${response.statusText}`); } fs.writeFileSync(zipPath, Buffer.from(await response.arrayBuffer())); const unzip = spawnSync('/usr/bin/unzip', ['-o', '-q', zipPath, '-d', extractDir], { stdio: 'pipe', encoding: 'utf8', }); if (unzip.status !== 0) { throw new Error(`Failed to unzip ${zipPath}: ${unzip.stderr || unzip.stdout}`); } const xmlFiles = fs.readdirSync(extractDir).filter((entry) => entry.endsWith('.xml')); if (xmlFiles.length !== 1) { throw new Error(`Expected one XML file for ${source.sourcePath}, found ${xmlFiles.length}`); } return path.join(extractDir, xmlFiles[0]); }; const getMetadata = (firstNorm) => { const statusNotes = [...firstNorm.matchAll(/([\s\S]*?)<\/standkommentar>/g)] .map((match) => cleanXmlText(match[1])); return { officialAbbreviation: extractFirst(firstNorm, 'amtabk'), legalAbbreviation: extractFirst(firstNorm, 'jurabk'), title: extractFirst(firstNorm, 'langue'), issuedAt: extractFirst(firstNorm, 'ausfertigung-datum'), statusNotes, }; }; const parseFederalSource = async (source) => { const xmlPath = await downloadFederalXml(source); const xml = fs.readFileSync(xmlPath, 'utf8'); const sourceFileName = path.basename(xmlPath); const firstNorm = xml.match(//)?.[0] || ''; const metadata = getMetadata(firstNorm); const norms = [...xml.matchAll(/]*doknr="([^"]+)"[^>]*>([\s\S]*?)<\/norm>/g)]; const ruleSections = norms.map((match, index) => { const sourceNormId = match[1]; const normXml = match[2]; const reference = extractFirst(normXml, 'enbez') || metadata.officialAbbreviation || source.scheduleId; const title = extractFirst(normXml, 'titel'); const contentMatch = normXml.match(/([\s\S]*?)<\/Content>/); const text = contentMatch ? cleanXmlText(contentMatch[1]) : ''; return { id: `${source.scheduleId}-section-${index + 1}`, scheduleId: source.scheduleId, sourceNormId, reference, title, text, }; }).filter((section) => section.text); const feeRows = []; let rowIndex = 0; for (const rowMatch of xml.matchAll(//g)) { const cells = [...rowMatch[0].matchAll(/]*>([\s\S]*?)<\/entry>/g)] .map((entryMatch) => cleanXmlText(entryMatch[1])) .filter(Boolean); if (cells.length < 2) { continue; } const rowType = getRowType(cells, source.rowMode); if (!rowType) { continue; } rowIndex += 1; const thirdCell = cells[2]; const lastCell = cells.at(-1) || ''; const points = rowType === 'fee-entry' && thirdCell && /^\d+$/.test(thirdCell) ? Number(thirdCell) : undefined; const amountEur = lastCell.includes('€') || (rowType === 'fee-entry' && cells.length >= 4) ? parseAmount(lastCell) : undefined; feeRows.push({ id: `${source.scheduleId}-row-${rowIndex}`, scheduleId: source.scheduleId, rowType, rowIndex, code: rowType === 'fee-entry' ? cells[0] : undefined, description: rowType === 'fee-entry' ? cells[1] : undefined, cells, points, amountEur, sourceNormId: undefined, }); } return { scheduleId: source.scheduleId, dataStatus: feeRows.length ? 'federal-law-fee-data' : 'federal-law-rules-only', edition: metadata.statusNotes.join(' ') || `Retrieved ${generatedAt}`, source: { name: 'Gesetze im Internet', url: `https://www.gesetze-im-internet.de/${source.sourcePath}/xml.zip`, pageUrl: `https://www.gesetze-im-internet.de/${source.sourcePath}/`, retrievedAt: generatedAt, sourceFileName, officialAbbreviation: metadata.officialAbbreviation, legalAbbreviation: metadata.legalAbbreviation, title: metadata.title, issuedAt: metadata.issuedAt, statusNotes: metadata.statusNotes, }, feeRows, ruleSections, notes: feeRows.length ? ['Generated from official federal XML table rows. Original row cells are preserved in order.'] : ['No separate fee table rows were present in the federal XML. Fee rules are represented as rule sections.'], }; }; const getExternalSourceData = (source) => ({ scheduleId: source.scheduleId, dataStatus: 'external-source-pending', edition: `Source identified ${generatedAt}`, source: { name: source.sourceName, url: source.sourceUrl, retrievedAt: generatedAt, }, feeRows: [], ruleSections: [], notes: [source.note], }); for (const source of allSources) { const filePath = path.join(outDir, `${source.fileName}.ts`); if (fs.existsSync(filePath)) { fs.rmSync(filePath); } } const generatedData = []; for (const source of federalSources) { const data = await parseFederalSource(source); writeGeneratedTsFile(source.fileName, source.exportName, data); generatedData.push({ ...source, data }); console.log(`${source.fileName}.ts: ${data.dataStatus}, rows=${data.feeRows.length}, sections=${data.ruleSections.length}`); } for (const source of externalSources) { const data = getExternalSourceData(source); writeGeneratedTsFile(source.fileName, source.exportName, data); generatedData.push({ ...source, data }); console.log(`${source.fileName}.ts: ${data.dataStatus}, rows=0, sections=0`); } const indexImports = generatedData .map((source) => `import { ${source.exportName} } from './${source.fileName}.js';`) .join('\n'); const indexExports = generatedData .map((source) => `export { ${source.exportName} } from './${source.fileName}.js';`) .join('\n'); const indexBody = [ `// Generated by scripts/generate-germany.mjs on ${generatedAt}.`, '// Do not edit this file manually.', "import type { IFeeScheduleData } from '../feeschedules.types.js';", indexImports, '', indexExports, '', 'export const GERMANY_FEE_SCHEDULE_DATA: IFeeScheduleData[] = [', ...generatedData.map((source) => ` ${source.exportName},`), '];', '', ].join('\n'); fs.writeFileSync(path.join(outDir, 'index.ts'), indexBody);