352 lines
13 KiB
JavaScript
352 lines
13 KiB
JavaScript
|
|
import { spawnSync } from 'node:child_process';
|
||
|
|
import fs from 'node:fs';
|
||
|
|
import path from 'node:path';
|
||
|
|
import { fileURLToPath } from 'node:url';
|
||
|
|
|
||
|
|
const packageRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
|
||
|
|
const sourceRoot = path.join(packageRoot, '.nogit', 'fee-schedules-sources');
|
||
|
|
const outDir = path.join(packageRoot, 'ts', 'germany');
|
||
|
|
const generatedAt = new Date().toISOString().slice(0, 10);
|
||
|
|
|
||
|
|
const federalSources = [
|
||
|
|
{ scheduleId: 'de-goae', fileName: 'goae', exportName: 'GERMANY_GOAE_FEE_SCHEDULE_DATA', sourcePath: 'go__1982', rowMode: 'code' },
|
||
|
|
{ scheduleId: 'de-goz', fileName: 'goz', exportName: 'GERMANY_GOZ_FEE_SCHEDULE_DATA', sourcePath: 'goz_1987', rowMode: 'code' },
|
||
|
|
{ scheduleId: 'de-gop', fileName: 'gop', exportName: 'GERMANY_GOP_FEE_SCHEDULE_DATA', sourcePath: 'gop', rowMode: 'none' },
|
||
|
|
{ scheduleId: 'de-got', fileName: 'got', exportName: 'GERMANY_GOT_FEE_SCHEDULE_DATA', sourcePath: 'got_2022', rowMode: 'code' },
|
||
|
|
{ scheduleId: 'de-stbvv', fileName: 'stbvv', exportName: 'GERMANY_STBVV_FEE_SCHEDULE_DATA', sourcePath: 'stbgebv', rowMode: 'table' },
|
||
|
|
{ scheduleId: 'de-rvg', fileName: 'rvg', exportName: 'GERMANY_RVG_FEE_SCHEDULE_DATA', sourcePath: 'rvg', rowMode: 'code' },
|
||
|
|
{ scheduleId: 'de-gnotkg', fileName: 'gnotkg', exportName: 'GERMANY_GNOTKG_FEE_SCHEDULE_DATA', sourcePath: 'gnotkg', rowMode: 'code' },
|
||
|
|
{ scheduleId: 'de-gkg', fileName: 'gkg', exportName: 'GERMANY_GKG_FEE_SCHEDULE_DATA', sourcePath: 'gkg_2004', rowMode: 'code' },
|
||
|
|
{ scheduleId: 'de-famgkg', fileName: 'famgkg', exportName: 'GERMANY_FAMGKG_FEE_SCHEDULE_DATA', sourcePath: 'famgkg', rowMode: 'code' },
|
||
|
|
{ scheduleId: 'de-gvkostg', fileName: 'gvkostg', exportName: 'GERMANY_GVKOSTG_FEE_SCHEDULE_DATA', sourcePath: 'gvkostg', rowMode: 'code' },
|
||
|
|
{ scheduleId: 'de-jveg', fileName: 'jveg', exportName: 'GERMANY_JVEG_FEE_SCHEDULE_DATA', sourcePath: 'jveg', rowMode: 'code' },
|
||
|
|
{ scheduleId: 'de-insvv', fileName: 'insvv', exportName: 'GERMANY_INSVV_FEE_SCHEDULE_DATA', sourcePath: 'insvv', rowMode: 'none' },
|
||
|
|
{ scheduleId: 'de-hoai', fileName: 'hoai', exportName: 'GERMANY_HOAI_FEE_SCHEDULE_DATA', sourcePath: 'hoai_2013', rowMode: 'table' },
|
||
|
|
{ scheduleId: 'de-ampreisv', fileName: 'ampreisv', exportName: 'GERMANY_AMPREISV_FEE_SCHEDULE_DATA', sourcePath: 'ampreisv', rowMode: 'none' },
|
||
|
|
];
|
||
|
|
|
||
|
|
const externalSources = [
|
||
|
|
{
|
||
|
|
scheduleId: 'de-ebm',
|
||
|
|
fileName: 'ebm',
|
||
|
|
exportName: 'GERMANY_EBM_FEE_SCHEDULE_DATA',
|
||
|
|
sourceName: 'Kassenärztliche Bundesvereinigung EBM',
|
||
|
|
sourceUrl: 'https://www.kbv.de/html/ebm.php',
|
||
|
|
note: 'External self-administration source; not part of gesetze-im-internet federal XML corpus.',
|
||
|
|
},
|
||
|
|
{
|
||
|
|
scheduleId: 'de-bema',
|
||
|
|
fileName: 'bema',
|
||
|
|
exportName: 'GERMANY_BEMA_FEE_SCHEDULE_DATA',
|
||
|
|
sourceName: 'Kassenzahnärztliche Bundesvereinigung BEMA',
|
||
|
|
sourceUrl: 'https://www.kzbv.de/bema',
|
||
|
|
note: 'External dental self-administration source; not part of gesetze-im-internet federal XML corpus.',
|
||
|
|
},
|
||
|
|
{
|
||
|
|
scheduleId: 'de-ag-drg-fpv',
|
||
|
|
fileName: 'agdrgfpv',
|
||
|
|
exportName: 'GERMANY_AGDRGFPV_FEE_SCHEDULE_DATA',
|
||
|
|
sourceName: 'InEK aG-DRG Fallpauschalen-Katalog / FPV',
|
||
|
|
sourceUrl: 'https://www.g-drg.de',
|
||
|
|
note: 'Year-specific InEK catalog source; not part of gesetze-im-internet federal XML corpus.',
|
||
|
|
},
|
||
|
|
{
|
||
|
|
scheduleId: 'de-uv-goae',
|
||
|
|
fileName: 'uvgoae',
|
||
|
|
exportName: 'GERMANY_UVGOAE_FEE_SCHEDULE_DATA',
|
||
|
|
sourceName: 'Deutsche Gesetzliche Unfallversicherung UV-GOÄ',
|
||
|
|
sourceUrl: 'https://www.dguv.de',
|
||
|
|
note: 'External accident insurance fee schedule source; not part of gesetze-im-internet federal XML corpus.',
|
||
|
|
},
|
||
|
|
{
|
||
|
|
scheduleId: 'de-heilmittel-gkv',
|
||
|
|
fileName: 'heilmittelgkv',
|
||
|
|
exportName: 'GERMANY_HEILMITTELGKV_FEE_SCHEDULE_DATA',
|
||
|
|
sourceName: 'GKV-Spitzenverband Heilmittel contracts and price lists',
|
||
|
|
sourceUrl: 'https://www.gkv-spitzenverband.de/krankenversicherung/ambulante_leistungen/heilmittel/heilmittel.jsp',
|
||
|
|
note: 'External contract and price list source by remedy area; not part of gesetze-im-internet federal XML corpus.',
|
||
|
|
},
|
||
|
|
{
|
||
|
|
scheduleId: 'de-hebammenhilfevertrag',
|
||
|
|
fileName: 'hebammenhilfevertrag',
|
||
|
|
exportName: 'GERMANY_HEBAMMENHILFEVERTRAG_FEE_SCHEDULE_DATA',
|
||
|
|
sourceName: 'GKV-Spitzenverband Hebammenhilfevertrag',
|
||
|
|
sourceUrl: 'https://www.gkv-spitzenverband.de/krankenversicherung/ambulante_leistungen/hebammen_geburtshaeuser/hebammenhilfevertrag/hebammenhilfevertrag.jsp',
|
||
|
|
note: 'External contract and remuneration agreement source; not part of gesetze-im-internet federal XML corpus.',
|
||
|
|
},
|
||
|
|
{
|
||
|
|
scheduleId: 'de-gebueh',
|
||
|
|
fileName: 'gebueh',
|
||
|
|
exportName: 'GERMANY_GEBUEH_FEE_SCHEDULE_DATA',
|
||
|
|
sourceName: 'Gebührenverzeichnis für Heilpraktiker',
|
||
|
|
sourceUrl: 'https://www.gebueh.de',
|
||
|
|
note: 'Non-official customary fee directory; not part of gesetze-im-internet federal XML corpus.',
|
||
|
|
},
|
||
|
|
];
|
||
|
|
|
||
|
|
const allSources = [...federalSources, ...externalSources];
|
||
|
|
|
||
|
|
fs.mkdirSync(sourceRoot, { recursive: true });
|
||
|
|
fs.mkdirSync(outDir, { recursive: true });
|
||
|
|
|
||
|
|
const decodeEntities = (input) => input
|
||
|
|
.replace(/ | /g, ' ')
|
||
|
|
.replace(/&/g, '&')
|
||
|
|
.replace(/</g, '<')
|
||
|
|
.replace(/>/g, '>')
|
||
|
|
.replace(/"/g, '"')
|
||
|
|
.replace(/'/g, "'")
|
||
|
|
.replace(/§/g, '§')
|
||
|
|
.replace(/Ä/g, 'Ä')
|
||
|
|
.replace(/Ö/g, 'Ö')
|
||
|
|
.replace(/Ü/g, 'Ü')
|
||
|
|
.replace(/ä/g, 'ä')
|
||
|
|
.replace(/ö/g, 'ö')
|
||
|
|
.replace(/ü/g, 'ü')
|
||
|
|
.replace(/ß/g, 'ß');
|
||
|
|
|
||
|
|
const cleanXmlText = (input) => decodeEntities(input)
|
||
|
|
.replace(/<BR\s*\/?\s*>/gi, ' ')
|
||
|
|
.replace(/<[^>]+>/g, ' ')
|
||
|
|
.replace(/\s+/g, ' ')
|
||
|
|
.trim();
|
||
|
|
|
||
|
|
const extractFirst = (xml, tag) => {
|
||
|
|
const match = xml.match(new RegExp(`<${tag}(?:\\s[^>]*)?>([\\s\\S]*?)<\\/${tag}>`));
|
||
|
|
return match ? cleanXmlText(match[1]) : undefined;
|
||
|
|
};
|
||
|
|
|
||
|
|
const isCode = (value) => /^(?:\d{1,6}[a-z]?|[A-Z]\d{1,5}|[A-Z]{1,3}\s?\d{1,5})$/.test(value);
|
||
|
|
const hasNumericValue = (value) => /\d/.test(value) && /^[\d\s.,€–\-]+$/.test(value);
|
||
|
|
|
||
|
|
const parseAmount = (value) => {
|
||
|
|
const numeric = value.match(/\d+(?:\.\d{3})*(?:,\d+)?|\d+(?:,\d+)?/g)?.at(-1);
|
||
|
|
if (!numeric) {
|
||
|
|
return undefined;
|
||
|
|
}
|
||
|
|
|
||
|
|
return Number(numeric.replace(/\./g, '').replace(',', '.'));
|
||
|
|
};
|
||
|
|
|
||
|
|
const getRowType = (cells, mode) => {
|
||
|
|
if (mode === 'code' && isCode(cells[0])) {
|
||
|
|
return 'fee-entry';
|
||
|
|
}
|
||
|
|
|
||
|
|
if (mode === 'table' && cells.length >= 2 && cells.filter(hasNumericValue).length >= 2) {
|
||
|
|
return 'fee-table-row';
|
||
|
|
}
|
||
|
|
|
||
|
|
return undefined;
|
||
|
|
};
|
||
|
|
|
||
|
|
const writeGeneratedTsFile = (fileName, exportName, data) => {
|
||
|
|
const filePath = path.join(outDir, `${fileName}.ts`);
|
||
|
|
const body = [
|
||
|
|
`// Generated by scripts/generate-germany.mjs on ${generatedAt}.`,
|
||
|
|
'// Do not edit this file manually.',
|
||
|
|
"import type { IFeeScheduleData } from '../feeschedules.types.js';",
|
||
|
|
'',
|
||
|
|
`export const ${exportName}: IFeeScheduleData = ${JSON.stringify(data, null, 2)};`,
|
||
|
|
'',
|
||
|
|
].join('\n');
|
||
|
|
|
||
|
|
fs.writeFileSync(filePath, body);
|
||
|
|
};
|
||
|
|
|
||
|
|
const downloadFederalXml = async (source) => {
|
||
|
|
const zipUrl = `https://www.gesetze-im-internet.de/${source.sourcePath}/xml.zip`;
|
||
|
|
const zipPath = path.join(sourceRoot, `${source.sourcePath}.zip`);
|
||
|
|
const extractDir = path.join(sourceRoot, source.sourcePath);
|
||
|
|
|
||
|
|
fs.mkdirSync(extractDir, { recursive: true });
|
||
|
|
|
||
|
|
const response = await fetch(zipUrl);
|
||
|
|
if (!response.ok) {
|
||
|
|
throw new Error(`Failed to download ${zipUrl}: ${response.status} ${response.statusText}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
fs.writeFileSync(zipPath, Buffer.from(await response.arrayBuffer()));
|
||
|
|
|
||
|
|
const unzip = spawnSync('/usr/bin/unzip', ['-o', '-q', zipPath, '-d', extractDir], {
|
||
|
|
stdio: 'pipe',
|
||
|
|
encoding: 'utf8',
|
||
|
|
});
|
||
|
|
|
||
|
|
if (unzip.status !== 0) {
|
||
|
|
throw new Error(`Failed to unzip ${zipPath}: ${unzip.stderr || unzip.stdout}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
const xmlFiles = fs.readdirSync(extractDir).filter((entry) => entry.endsWith('.xml'));
|
||
|
|
if (xmlFiles.length !== 1) {
|
||
|
|
throw new Error(`Expected one XML file for ${source.sourcePath}, found ${xmlFiles.length}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
return path.join(extractDir, xmlFiles[0]);
|
||
|
|
};
|
||
|
|
|
||
|
|
const getMetadata = (firstNorm) => {
|
||
|
|
const statusNotes = [...firstNorm.matchAll(/<standkommentar>([\s\S]*?)<\/standkommentar>/g)]
|
||
|
|
.map((match) => cleanXmlText(match[1]));
|
||
|
|
|
||
|
|
return {
|
||
|
|
officialAbbreviation: extractFirst(firstNorm, 'amtabk'),
|
||
|
|
legalAbbreviation: extractFirst(firstNorm, 'jurabk'),
|
||
|
|
title: extractFirst(firstNorm, 'langue'),
|
||
|
|
issuedAt: extractFirst(firstNorm, 'ausfertigung-datum'),
|
||
|
|
statusNotes,
|
||
|
|
};
|
||
|
|
};
|
||
|
|
|
||
|
|
const parseFederalSource = async (source) => {
|
||
|
|
const xmlPath = await downloadFederalXml(source);
|
||
|
|
const xml = fs.readFileSync(xmlPath, 'utf8');
|
||
|
|
const sourceFileName = path.basename(xmlPath);
|
||
|
|
const firstNorm = xml.match(/<norm[\s\S]*?<\/norm>/)?.[0] || '';
|
||
|
|
const metadata = getMetadata(firstNorm);
|
||
|
|
const norms = [...xml.matchAll(/<norm\b[^>]*doknr="([^"]+)"[^>]*>([\s\S]*?)<\/norm>/g)];
|
||
|
|
|
||
|
|
const ruleSections = norms.map((match, index) => {
|
||
|
|
const sourceNormId = match[1];
|
||
|
|
const normXml = match[2];
|
||
|
|
const reference = extractFirst(normXml, 'enbez') || metadata.officialAbbreviation || source.scheduleId;
|
||
|
|
const title = extractFirst(normXml, 'titel');
|
||
|
|
const contentMatch = normXml.match(/<Content>([\s\S]*?)<\/Content>/);
|
||
|
|
const text = contentMatch ? cleanXmlText(contentMatch[1]) : '';
|
||
|
|
|
||
|
|
return {
|
||
|
|
id: `${source.scheduleId}-section-${index + 1}`,
|
||
|
|
scheduleId: source.scheduleId,
|
||
|
|
sourceNormId,
|
||
|
|
reference,
|
||
|
|
title,
|
||
|
|
text,
|
||
|
|
};
|
||
|
|
}).filter((section) => section.text);
|
||
|
|
|
||
|
|
const feeRows = [];
|
||
|
|
let rowIndex = 0;
|
||
|
|
|
||
|
|
for (const rowMatch of xml.matchAll(/<row[\s\S]*?<\/row>/g)) {
|
||
|
|
const cells = [...rowMatch[0].matchAll(/<entry[^>]*>([\s\S]*?)<\/entry>/g)]
|
||
|
|
.map((entryMatch) => cleanXmlText(entryMatch[1]))
|
||
|
|
.filter(Boolean);
|
||
|
|
|
||
|
|
if (cells.length < 2) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
const rowType = getRowType(cells, source.rowMode);
|
||
|
|
if (!rowType) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
rowIndex += 1;
|
||
|
|
const thirdCell = cells[2];
|
||
|
|
const lastCell = cells.at(-1) || '';
|
||
|
|
const points = rowType === 'fee-entry' && thirdCell && /^\d+$/.test(thirdCell)
|
||
|
|
? Number(thirdCell)
|
||
|
|
: undefined;
|
||
|
|
const amountEur = lastCell.includes('€') || (rowType === 'fee-entry' && cells.length >= 4)
|
||
|
|
? parseAmount(lastCell)
|
||
|
|
: undefined;
|
||
|
|
|
||
|
|
feeRows.push({
|
||
|
|
id: `${source.scheduleId}-row-${rowIndex}`,
|
||
|
|
scheduleId: source.scheduleId,
|
||
|
|
rowType,
|
||
|
|
rowIndex,
|
||
|
|
code: rowType === 'fee-entry' ? cells[0] : undefined,
|
||
|
|
description: rowType === 'fee-entry' ? cells[1] : undefined,
|
||
|
|
cells,
|
||
|
|
points,
|
||
|
|
amountEur,
|
||
|
|
sourceNormId: undefined,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
return {
|
||
|
|
scheduleId: source.scheduleId,
|
||
|
|
dataStatus: feeRows.length ? 'federal-law-fee-data' : 'federal-law-rules-only',
|
||
|
|
edition: metadata.statusNotes.join(' ') || `Retrieved ${generatedAt}`,
|
||
|
|
source: {
|
||
|
|
name: 'Gesetze im Internet',
|
||
|
|
url: `https://www.gesetze-im-internet.de/${source.sourcePath}/xml.zip`,
|
||
|
|
pageUrl: `https://www.gesetze-im-internet.de/${source.sourcePath}/`,
|
||
|
|
retrievedAt: generatedAt,
|
||
|
|
sourceFileName,
|
||
|
|
officialAbbreviation: metadata.officialAbbreviation,
|
||
|
|
legalAbbreviation: metadata.legalAbbreviation,
|
||
|
|
title: metadata.title,
|
||
|
|
issuedAt: metadata.issuedAt,
|
||
|
|
statusNotes: metadata.statusNotes,
|
||
|
|
},
|
||
|
|
feeRows,
|
||
|
|
ruleSections,
|
||
|
|
notes: feeRows.length
|
||
|
|
? ['Generated from official federal XML table rows. Original row cells are preserved in order.']
|
||
|
|
: ['No separate fee table rows were present in the federal XML. Fee rules are represented as rule sections.'],
|
||
|
|
};
|
||
|
|
};
|
||
|
|
|
||
|
|
const getExternalSourceData = (source) => ({
|
||
|
|
scheduleId: source.scheduleId,
|
||
|
|
dataStatus: 'external-source-pending',
|
||
|
|
edition: `Source identified ${generatedAt}`,
|
||
|
|
source: {
|
||
|
|
name: source.sourceName,
|
||
|
|
url: source.sourceUrl,
|
||
|
|
retrievedAt: generatedAt,
|
||
|
|
},
|
||
|
|
feeRows: [],
|
||
|
|
ruleSections: [],
|
||
|
|
notes: [source.note],
|
||
|
|
});
|
||
|
|
|
||
|
|
for (const source of allSources) {
|
||
|
|
const filePath = path.join(outDir, `${source.fileName}.ts`);
|
||
|
|
if (fs.existsSync(filePath)) {
|
||
|
|
fs.rmSync(filePath);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
const generatedData = [];
|
||
|
|
|
||
|
|
for (const source of federalSources) {
|
||
|
|
const data = await parseFederalSource(source);
|
||
|
|
writeGeneratedTsFile(source.fileName, source.exportName, data);
|
||
|
|
generatedData.push({ ...source, data });
|
||
|
|
console.log(`${source.fileName}.ts: ${data.dataStatus}, rows=${data.feeRows.length}, sections=${data.ruleSections.length}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
for (const source of externalSources) {
|
||
|
|
const data = getExternalSourceData(source);
|
||
|
|
writeGeneratedTsFile(source.fileName, source.exportName, data);
|
||
|
|
generatedData.push({ ...source, data });
|
||
|
|
console.log(`${source.fileName}.ts: ${data.dataStatus}, rows=0, sections=0`);
|
||
|
|
}
|
||
|
|
|
||
|
|
const indexImports = generatedData
|
||
|
|
.map((source) => `import { ${source.exportName} } from './${source.fileName}.js';`)
|
||
|
|
.join('\n');
|
||
|
|
|
||
|
|
const indexExports = generatedData
|
||
|
|
.map((source) => `export { ${source.exportName} } from './${source.fileName}.js';`)
|
||
|
|
.join('\n');
|
||
|
|
|
||
|
|
const indexBody = [
|
||
|
|
`// Generated by scripts/generate-germany.mjs on ${generatedAt}.`,
|
||
|
|
'// Do not edit this file manually.',
|
||
|
|
"import type { IFeeScheduleData } from '../feeschedules.types.js';",
|
||
|
|
indexImports,
|
||
|
|
'',
|
||
|
|
indexExports,
|
||
|
|
'',
|
||
|
|
'export const GERMANY_FEE_SCHEDULE_DATA: IFeeScheduleData[] = [',
|
||
|
|
...generatedData.map((source) => ` ${source.exportName},`),
|
||
|
|
'];',
|
||
|
|
'',
|
||
|
|
].join('\n');
|
||
|
|
|
||
|
|
fs.writeFileSync(path.join(outDir, 'index.ts'), indexBody);
|