Files
fee-schedules/scripts/generate-germany.mjs
T
2026-05-14 10:05:17 +00:00

352 lines
13 KiB
JavaScript

import { spawnSync } from 'node:child_process';
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const packageRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
const sourceRoot = path.join(packageRoot, '.nogit', 'fee-schedules-sources');
const outDir = path.join(packageRoot, 'ts', 'germany');
const generatedAt = new Date().toISOString().slice(0, 10);
const federalSources = [
{ scheduleId: 'de-goae', fileName: 'goae', exportName: 'GERMANY_GOAE_FEE_SCHEDULE_DATA', sourcePath: 'go__1982', rowMode: 'code' },
{ scheduleId: 'de-goz', fileName: 'goz', exportName: 'GERMANY_GOZ_FEE_SCHEDULE_DATA', sourcePath: 'goz_1987', rowMode: 'code' },
{ scheduleId: 'de-gop', fileName: 'gop', exportName: 'GERMANY_GOP_FEE_SCHEDULE_DATA', sourcePath: 'gop', rowMode: 'none' },
{ scheduleId: 'de-got', fileName: 'got', exportName: 'GERMANY_GOT_FEE_SCHEDULE_DATA', sourcePath: 'got_2022', rowMode: 'code' },
{ scheduleId: 'de-stbvv', fileName: 'stbvv', exportName: 'GERMANY_STBVV_FEE_SCHEDULE_DATA', sourcePath: 'stbgebv', rowMode: 'table' },
{ scheduleId: 'de-rvg', fileName: 'rvg', exportName: 'GERMANY_RVG_FEE_SCHEDULE_DATA', sourcePath: 'rvg', rowMode: 'code' },
{ scheduleId: 'de-gnotkg', fileName: 'gnotkg', exportName: 'GERMANY_GNOTKG_FEE_SCHEDULE_DATA', sourcePath: 'gnotkg', rowMode: 'code' },
{ scheduleId: 'de-gkg', fileName: 'gkg', exportName: 'GERMANY_GKG_FEE_SCHEDULE_DATA', sourcePath: 'gkg_2004', rowMode: 'code' },
{ scheduleId: 'de-famgkg', fileName: 'famgkg', exportName: 'GERMANY_FAMGKG_FEE_SCHEDULE_DATA', sourcePath: 'famgkg', rowMode: 'code' },
{ scheduleId: 'de-gvkostg', fileName: 'gvkostg', exportName: 'GERMANY_GVKOSTG_FEE_SCHEDULE_DATA', sourcePath: 'gvkostg', rowMode: 'code' },
{ scheduleId: 'de-jveg', fileName: 'jveg', exportName: 'GERMANY_JVEG_FEE_SCHEDULE_DATA', sourcePath: 'jveg', rowMode: 'code' },
{ scheduleId: 'de-insvv', fileName: 'insvv', exportName: 'GERMANY_INSVV_FEE_SCHEDULE_DATA', sourcePath: 'insvv', rowMode: 'none' },
{ scheduleId: 'de-hoai', fileName: 'hoai', exportName: 'GERMANY_HOAI_FEE_SCHEDULE_DATA', sourcePath: 'hoai_2013', rowMode: 'table' },
{ scheduleId: 'de-ampreisv', fileName: 'ampreisv', exportName: 'GERMANY_AMPREISV_FEE_SCHEDULE_DATA', sourcePath: 'ampreisv', rowMode: 'none' },
];
const externalSources = [
{
scheduleId: 'de-ebm',
fileName: 'ebm',
exportName: 'GERMANY_EBM_FEE_SCHEDULE_DATA',
sourceName: 'Kassenärztliche Bundesvereinigung EBM',
sourceUrl: 'https://www.kbv.de/html/ebm.php',
note: 'External self-administration source; not part of gesetze-im-internet federal XML corpus.',
},
{
scheduleId: 'de-bema',
fileName: 'bema',
exportName: 'GERMANY_BEMA_FEE_SCHEDULE_DATA',
sourceName: 'Kassenzahnärztliche Bundesvereinigung BEMA',
sourceUrl: 'https://www.kzbv.de/bema',
note: 'External dental self-administration source; not part of gesetze-im-internet federal XML corpus.',
},
{
scheduleId: 'de-ag-drg-fpv',
fileName: 'agdrgfpv',
exportName: 'GERMANY_AGDRGFPV_FEE_SCHEDULE_DATA',
sourceName: 'InEK aG-DRG Fallpauschalen-Katalog / FPV',
sourceUrl: 'https://www.g-drg.de',
note: 'Year-specific InEK catalog source; not part of gesetze-im-internet federal XML corpus.',
},
{
scheduleId: 'de-uv-goae',
fileName: 'uvgoae',
exportName: 'GERMANY_UVGOAE_FEE_SCHEDULE_DATA',
sourceName: 'Deutsche Gesetzliche Unfallversicherung UV-GOÄ',
sourceUrl: 'https://www.dguv.de',
note: 'External accident insurance fee schedule source; not part of gesetze-im-internet federal XML corpus.',
},
{
scheduleId: 'de-heilmittel-gkv',
fileName: 'heilmittelgkv',
exportName: 'GERMANY_HEILMITTELGKV_FEE_SCHEDULE_DATA',
sourceName: 'GKV-Spitzenverband Heilmittel contracts and price lists',
sourceUrl: 'https://www.gkv-spitzenverband.de/krankenversicherung/ambulante_leistungen/heilmittel/heilmittel.jsp',
note: 'External contract and price list source by remedy area; not part of gesetze-im-internet federal XML corpus.',
},
{
scheduleId: 'de-hebammenhilfevertrag',
fileName: 'hebammenhilfevertrag',
exportName: 'GERMANY_HEBAMMENHILFEVERTRAG_FEE_SCHEDULE_DATA',
sourceName: 'GKV-Spitzenverband Hebammenhilfevertrag',
sourceUrl: 'https://www.gkv-spitzenverband.de/krankenversicherung/ambulante_leistungen/hebammen_geburtshaeuser/hebammenhilfevertrag/hebammenhilfevertrag.jsp',
note: 'External contract and remuneration agreement source; not part of gesetze-im-internet federal XML corpus.',
},
{
scheduleId: 'de-gebueh',
fileName: 'gebueh',
exportName: 'GERMANY_GEBUEH_FEE_SCHEDULE_DATA',
sourceName: 'Gebührenverzeichnis für Heilpraktiker',
sourceUrl: 'https://www.gebueh.de',
note: 'Non-official customary fee directory; not part of gesetze-im-internet federal XML corpus.',
},
];
const allSources = [...federalSources, ...externalSources];
fs.mkdirSync(sourceRoot, { recursive: true });
fs.mkdirSync(outDir, { recursive: true });
const decodeEntities = (input) => input
.replace(/ | /g, ' ')
.replace(/&/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&#167;/g, '§')
.replace(/&#196;/g, 'Ä')
.replace(/&#214;/g, 'Ö')
.replace(/&#220;/g, 'Ü')
.replace(/&#228;/g, 'ä')
.replace(/&#246;/g, 'ö')
.replace(/&#252;/g, 'ü')
.replace(/&#223;/g, 'ß');
const cleanXmlText = (input) => decodeEntities(input)
.replace(/<BR\s*\/?\s*>/gi, ' ')
.replace(/<[^>]+>/g, ' ')
.replace(/\s+/g, ' ')
.trim();
const extractFirst = (xml, tag) => {
const match = xml.match(new RegExp(`<${tag}(?:\\s[^>]*)?>([\\s\\S]*?)<\\/${tag}>`));
return match ? cleanXmlText(match[1]) : undefined;
};
const isCode = (value) => /^(?:\d{1,6}[a-z]?|[A-Z]\d{1,5}|[A-Z]{1,3}\s?\d{1,5})$/.test(value);
const hasNumericValue = (value) => /\d/.test(value) && /^[\d\s.,€–\-]+$/.test(value);
const parseAmount = (value) => {
const numeric = value.match(/\d+(?:\.\d{3})*(?:,\d+)?|\d+(?:,\d+)?/g)?.at(-1);
if (!numeric) {
return undefined;
}
return Number(numeric.replace(/\./g, '').replace(',', '.'));
};
const getRowType = (cells, mode) => {
if (mode === 'code' && isCode(cells[0])) {
return 'fee-entry';
}
if (mode === 'table' && cells.length >= 2 && cells.filter(hasNumericValue).length >= 2) {
return 'fee-table-row';
}
return undefined;
};
const writeGeneratedTsFile = (fileName, exportName, data) => {
const filePath = path.join(outDir, `${fileName}.ts`);
const body = [
`// Generated by scripts/generate-germany.mjs on ${generatedAt}.`,
'// Do not edit this file manually.',
"import type { IFeeScheduleData } from '../feeschedules.types.js';",
'',
`export const ${exportName}: IFeeScheduleData = ${JSON.stringify(data, null, 2)};`,
'',
].join('\n');
fs.writeFileSync(filePath, body);
};
const downloadFederalXml = async (source) => {
const zipUrl = `https://www.gesetze-im-internet.de/${source.sourcePath}/xml.zip`;
const zipPath = path.join(sourceRoot, `${source.sourcePath}.zip`);
const extractDir = path.join(sourceRoot, source.sourcePath);
fs.mkdirSync(extractDir, { recursive: true });
const response = await fetch(zipUrl);
if (!response.ok) {
throw new Error(`Failed to download ${zipUrl}: ${response.status} ${response.statusText}`);
}
fs.writeFileSync(zipPath, Buffer.from(await response.arrayBuffer()));
const unzip = spawnSync('/usr/bin/unzip', ['-o', '-q', zipPath, '-d', extractDir], {
stdio: 'pipe',
encoding: 'utf8',
});
if (unzip.status !== 0) {
throw new Error(`Failed to unzip ${zipPath}: ${unzip.stderr || unzip.stdout}`);
}
const xmlFiles = fs.readdirSync(extractDir).filter((entry) => entry.endsWith('.xml'));
if (xmlFiles.length !== 1) {
throw new Error(`Expected one XML file for ${source.sourcePath}, found ${xmlFiles.length}`);
}
return path.join(extractDir, xmlFiles[0]);
};
const getMetadata = (firstNorm) => {
const statusNotes = [...firstNorm.matchAll(/<standkommentar>([\s\S]*?)<\/standkommentar>/g)]
.map((match) => cleanXmlText(match[1]));
return {
officialAbbreviation: extractFirst(firstNorm, 'amtabk'),
legalAbbreviation: extractFirst(firstNorm, 'jurabk'),
title: extractFirst(firstNorm, 'langue'),
issuedAt: extractFirst(firstNorm, 'ausfertigung-datum'),
statusNotes,
};
};
const parseFederalSource = async (source) => {
const xmlPath = await downloadFederalXml(source);
const xml = fs.readFileSync(xmlPath, 'utf8');
const sourceFileName = path.basename(xmlPath);
const firstNorm = xml.match(/<norm[\s\S]*?<\/norm>/)?.[0] || '';
const metadata = getMetadata(firstNorm);
const norms = [...xml.matchAll(/<norm\b[^>]*doknr="([^"]+)"[^>]*>([\s\S]*?)<\/norm>/g)];
const ruleSections = norms.map((match, index) => {
const sourceNormId = match[1];
const normXml = match[2];
const reference = extractFirst(normXml, 'enbez') || metadata.officialAbbreviation || source.scheduleId;
const title = extractFirst(normXml, 'titel');
const contentMatch = normXml.match(/<Content>([\s\S]*?)<\/Content>/);
const text = contentMatch ? cleanXmlText(contentMatch[1]) : '';
return {
id: `${source.scheduleId}-section-${index + 1}`,
scheduleId: source.scheduleId,
sourceNormId,
reference,
title,
text,
};
}).filter((section) => section.text);
const feeRows = [];
let rowIndex = 0;
for (const rowMatch of xml.matchAll(/<row[\s\S]*?<\/row>/g)) {
const cells = [...rowMatch[0].matchAll(/<entry[^>]*>([\s\S]*?)<\/entry>/g)]
.map((entryMatch) => cleanXmlText(entryMatch[1]))
.filter(Boolean);
if (cells.length < 2) {
continue;
}
const rowType = getRowType(cells, source.rowMode);
if (!rowType) {
continue;
}
rowIndex += 1;
const thirdCell = cells[2];
const lastCell = cells.at(-1) || '';
const points = rowType === 'fee-entry' && thirdCell && /^\d+$/.test(thirdCell)
? Number(thirdCell)
: undefined;
const amountEur = lastCell.includes('€') || (rowType === 'fee-entry' && cells.length >= 4)
? parseAmount(lastCell)
: undefined;
feeRows.push({
id: `${source.scheduleId}-row-${rowIndex}`,
scheduleId: source.scheduleId,
rowType,
rowIndex,
code: rowType === 'fee-entry' ? cells[0] : undefined,
description: rowType === 'fee-entry' ? cells[1] : undefined,
cells,
points,
amountEur,
sourceNormId: undefined,
});
}
return {
scheduleId: source.scheduleId,
dataStatus: feeRows.length ? 'federal-law-fee-data' : 'federal-law-rules-only',
edition: metadata.statusNotes.join(' ') || `Retrieved ${generatedAt}`,
source: {
name: 'Gesetze im Internet',
url: `https://www.gesetze-im-internet.de/${source.sourcePath}/xml.zip`,
pageUrl: `https://www.gesetze-im-internet.de/${source.sourcePath}/`,
retrievedAt: generatedAt,
sourceFileName,
officialAbbreviation: metadata.officialAbbreviation,
legalAbbreviation: metadata.legalAbbreviation,
title: metadata.title,
issuedAt: metadata.issuedAt,
statusNotes: metadata.statusNotes,
},
feeRows,
ruleSections,
notes: feeRows.length
? ['Generated from official federal XML table rows. Original row cells are preserved in order.']
: ['No separate fee table rows were present in the federal XML. Fee rules are represented as rule sections.'],
};
};
const getExternalSourceData = (source) => ({
scheduleId: source.scheduleId,
dataStatus: 'external-source-pending',
edition: `Source identified ${generatedAt}`,
source: {
name: source.sourceName,
url: source.sourceUrl,
retrievedAt: generatedAt,
},
feeRows: [],
ruleSections: [],
notes: [source.note],
});
for (const source of allSources) {
const filePath = path.join(outDir, `${source.fileName}.ts`);
if (fs.existsSync(filePath)) {
fs.rmSync(filePath);
}
}
const generatedData = [];
for (const source of federalSources) {
const data = await parseFederalSource(source);
writeGeneratedTsFile(source.fileName, source.exportName, data);
generatedData.push({ ...source, data });
console.log(`${source.fileName}.ts: ${data.dataStatus}, rows=${data.feeRows.length}, sections=${data.ruleSections.length}`);
}
for (const source of externalSources) {
const data = getExternalSourceData(source);
writeGeneratedTsFile(source.fileName, source.exportName, data);
generatedData.push({ ...source, data });
console.log(`${source.fileName}.ts: ${data.dataStatus}, rows=0, sections=0`);
}
const indexImports = generatedData
.map((source) => `import { ${source.exportName} } from './${source.fileName}.js';`)
.join('\n');
const indexExports = generatedData
.map((source) => `export { ${source.exportName} } from './${source.fileName}.js';`)
.join('\n');
const indexBody = [
`// Generated by scripts/generate-germany.mjs on ${generatedAt}.`,
'// Do not edit this file manually.',
"import type { IFeeScheduleData } from '../feeschedules.types.js';",
indexImports,
'',
indexExports,
'',
'export const GERMANY_FEE_SCHEDULE_DATA: IFeeScheduleData[] = [',
...generatedData.map((source) => ` ${source.exportName},`),
'];',
'',
].join('\n');
fs.writeFileSync(path.join(outDir, 'index.ts'), indexBody);