initial
This commit is contained in:
@@ -0,0 +1,351 @@
|
||||
import { spawnSync } from 'node:child_process';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const packageRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
|
||||
const sourceRoot = path.join(packageRoot, '.nogit', 'fee-schedules-sources');
|
||||
const outDir = path.join(packageRoot, 'ts', 'germany');
|
||||
const generatedAt = new Date().toISOString().slice(0, 10);
|
||||
|
||||
const federalSources = [
|
||||
{ scheduleId: 'de-goae', fileName: 'goae', exportName: 'GERMANY_GOAE_FEE_SCHEDULE_DATA', sourcePath: 'go__1982', rowMode: 'code' },
|
||||
{ scheduleId: 'de-goz', fileName: 'goz', exportName: 'GERMANY_GOZ_FEE_SCHEDULE_DATA', sourcePath: 'goz_1987', rowMode: 'code' },
|
||||
{ scheduleId: 'de-gop', fileName: 'gop', exportName: 'GERMANY_GOP_FEE_SCHEDULE_DATA', sourcePath: 'gop', rowMode: 'none' },
|
||||
{ scheduleId: 'de-got', fileName: 'got', exportName: 'GERMANY_GOT_FEE_SCHEDULE_DATA', sourcePath: 'got_2022', rowMode: 'code' },
|
||||
{ scheduleId: 'de-stbvv', fileName: 'stbvv', exportName: 'GERMANY_STBVV_FEE_SCHEDULE_DATA', sourcePath: 'stbgebv', rowMode: 'table' },
|
||||
{ scheduleId: 'de-rvg', fileName: 'rvg', exportName: 'GERMANY_RVG_FEE_SCHEDULE_DATA', sourcePath: 'rvg', rowMode: 'code' },
|
||||
{ scheduleId: 'de-gnotkg', fileName: 'gnotkg', exportName: 'GERMANY_GNOTKG_FEE_SCHEDULE_DATA', sourcePath: 'gnotkg', rowMode: 'code' },
|
||||
{ scheduleId: 'de-gkg', fileName: 'gkg', exportName: 'GERMANY_GKG_FEE_SCHEDULE_DATA', sourcePath: 'gkg_2004', rowMode: 'code' },
|
||||
{ scheduleId: 'de-famgkg', fileName: 'famgkg', exportName: 'GERMANY_FAMGKG_FEE_SCHEDULE_DATA', sourcePath: 'famgkg', rowMode: 'code' },
|
||||
{ scheduleId: 'de-gvkostg', fileName: 'gvkostg', exportName: 'GERMANY_GVKOSTG_FEE_SCHEDULE_DATA', sourcePath: 'gvkostg', rowMode: 'code' },
|
||||
{ scheduleId: 'de-jveg', fileName: 'jveg', exportName: 'GERMANY_JVEG_FEE_SCHEDULE_DATA', sourcePath: 'jveg', rowMode: 'code' },
|
||||
{ scheduleId: 'de-insvv', fileName: 'insvv', exportName: 'GERMANY_INSVV_FEE_SCHEDULE_DATA', sourcePath: 'insvv', rowMode: 'none' },
|
||||
{ scheduleId: 'de-hoai', fileName: 'hoai', exportName: 'GERMANY_HOAI_FEE_SCHEDULE_DATA', sourcePath: 'hoai_2013', rowMode: 'table' },
|
||||
{ scheduleId: 'de-ampreisv', fileName: 'ampreisv', exportName: 'GERMANY_AMPREISV_FEE_SCHEDULE_DATA', sourcePath: 'ampreisv', rowMode: 'none' },
|
||||
];
|
||||
|
||||
const externalSources = [
|
||||
{
|
||||
scheduleId: 'de-ebm',
|
||||
fileName: 'ebm',
|
||||
exportName: 'GERMANY_EBM_FEE_SCHEDULE_DATA',
|
||||
sourceName: 'Kassenärztliche Bundesvereinigung EBM',
|
||||
sourceUrl: 'https://www.kbv.de/html/ebm.php',
|
||||
note: 'External self-administration source; not part of gesetze-im-internet federal XML corpus.',
|
||||
},
|
||||
{
|
||||
scheduleId: 'de-bema',
|
||||
fileName: 'bema',
|
||||
exportName: 'GERMANY_BEMA_FEE_SCHEDULE_DATA',
|
||||
sourceName: 'Kassenzahnärztliche Bundesvereinigung BEMA',
|
||||
sourceUrl: 'https://www.kzbv.de/bema',
|
||||
note: 'External dental self-administration source; not part of gesetze-im-internet federal XML corpus.',
|
||||
},
|
||||
{
|
||||
scheduleId: 'de-ag-drg-fpv',
|
||||
fileName: 'agdrgfpv',
|
||||
exportName: 'GERMANY_AGDRGFPV_FEE_SCHEDULE_DATA',
|
||||
sourceName: 'InEK aG-DRG Fallpauschalen-Katalog / FPV',
|
||||
sourceUrl: 'https://www.g-drg.de',
|
||||
note: 'Year-specific InEK catalog source; not part of gesetze-im-internet federal XML corpus.',
|
||||
},
|
||||
{
|
||||
scheduleId: 'de-uv-goae',
|
||||
fileName: 'uvgoae',
|
||||
exportName: 'GERMANY_UVGOAE_FEE_SCHEDULE_DATA',
|
||||
sourceName: 'Deutsche Gesetzliche Unfallversicherung UV-GOÄ',
|
||||
sourceUrl: 'https://www.dguv.de',
|
||||
note: 'External accident insurance fee schedule source; not part of gesetze-im-internet federal XML corpus.',
|
||||
},
|
||||
{
|
||||
scheduleId: 'de-heilmittel-gkv',
|
||||
fileName: 'heilmittelgkv',
|
||||
exportName: 'GERMANY_HEILMITTELGKV_FEE_SCHEDULE_DATA',
|
||||
sourceName: 'GKV-Spitzenverband Heilmittel contracts and price lists',
|
||||
sourceUrl: 'https://www.gkv-spitzenverband.de/krankenversicherung/ambulante_leistungen/heilmittel/heilmittel.jsp',
|
||||
note: 'External contract and price list source by remedy area; not part of gesetze-im-internet federal XML corpus.',
|
||||
},
|
||||
{
|
||||
scheduleId: 'de-hebammenhilfevertrag',
|
||||
fileName: 'hebammenhilfevertrag',
|
||||
exportName: 'GERMANY_HEBAMMENHILFEVERTRAG_FEE_SCHEDULE_DATA',
|
||||
sourceName: 'GKV-Spitzenverband Hebammenhilfevertrag',
|
||||
sourceUrl: 'https://www.gkv-spitzenverband.de/krankenversicherung/ambulante_leistungen/hebammen_geburtshaeuser/hebammenhilfevertrag/hebammenhilfevertrag.jsp',
|
||||
note: 'External contract and remuneration agreement source; not part of gesetze-im-internet federal XML corpus.',
|
||||
},
|
||||
{
|
||||
scheduleId: 'de-gebueh',
|
||||
fileName: 'gebueh',
|
||||
exportName: 'GERMANY_GEBUEH_FEE_SCHEDULE_DATA',
|
||||
sourceName: 'Gebührenverzeichnis für Heilpraktiker',
|
||||
sourceUrl: 'https://www.gebueh.de',
|
||||
note: 'Non-official customary fee directory; not part of gesetze-im-internet federal XML corpus.',
|
||||
},
|
||||
];
|
||||
|
||||
const allSources = [...federalSources, ...externalSources];
|
||||
|
||||
fs.mkdirSync(sourceRoot, { recursive: true });
|
||||
fs.mkdirSync(outDir, { recursive: true });
|
||||
|
||||
const decodeEntities = (input) => input
|
||||
.replace(/ | /g, ' ')
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/§/g, '§')
|
||||
.replace(/Ä/g, 'Ä')
|
||||
.replace(/Ö/g, 'Ö')
|
||||
.replace(/Ü/g, 'Ü')
|
||||
.replace(/ä/g, 'ä')
|
||||
.replace(/ö/g, 'ö')
|
||||
.replace(/ü/g, 'ü')
|
||||
.replace(/ß/g, 'ß');
|
||||
|
||||
const cleanXmlText = (input) => decodeEntities(input)
|
||||
.replace(/<BR\s*\/?\s*>/gi, ' ')
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
const extractFirst = (xml, tag) => {
|
||||
const match = xml.match(new RegExp(`<${tag}(?:\\s[^>]*)?>([\\s\\S]*?)<\\/${tag}>`));
|
||||
return match ? cleanXmlText(match[1]) : undefined;
|
||||
};
|
||||
|
||||
const isCode = (value) => /^(?:\d{1,6}[a-z]?|[A-Z]\d{1,5}|[A-Z]{1,3}\s?\d{1,5})$/.test(value);
|
||||
const hasNumericValue = (value) => /\d/.test(value) && /^[\d\s.,€–\-]+$/.test(value);
|
||||
|
||||
const parseAmount = (value) => {
|
||||
const numeric = value.match(/\d+(?:\.\d{3})*(?:,\d+)?|\d+(?:,\d+)?/g)?.at(-1);
|
||||
if (!numeric) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return Number(numeric.replace(/\./g, '').replace(',', '.'));
|
||||
};
|
||||
|
||||
const getRowType = (cells, mode) => {
|
||||
if (mode === 'code' && isCode(cells[0])) {
|
||||
return 'fee-entry';
|
||||
}
|
||||
|
||||
if (mode === 'table' && cells.length >= 2 && cells.filter(hasNumericValue).length >= 2) {
|
||||
return 'fee-table-row';
|
||||
}
|
||||
|
||||
return undefined;
|
||||
};
|
||||
|
||||
const writeGeneratedTsFile = (fileName, exportName, data) => {
|
||||
const filePath = path.join(outDir, `${fileName}.ts`);
|
||||
const body = [
|
||||
`// Generated by scripts/generate-germany.mjs on ${generatedAt}.`,
|
||||
'// Do not edit this file manually.',
|
||||
"import type { IFeeScheduleData } from '../feeschedules.types.js';",
|
||||
'',
|
||||
`export const ${exportName}: IFeeScheduleData = ${JSON.stringify(data, null, 2)};`,
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
fs.writeFileSync(filePath, body);
|
||||
};
|
||||
|
||||
const downloadFederalXml = async (source) => {
|
||||
const zipUrl = `https://www.gesetze-im-internet.de/${source.sourcePath}/xml.zip`;
|
||||
const zipPath = path.join(sourceRoot, `${source.sourcePath}.zip`);
|
||||
const extractDir = path.join(sourceRoot, source.sourcePath);
|
||||
|
||||
fs.mkdirSync(extractDir, { recursive: true });
|
||||
|
||||
const response = await fetch(zipUrl);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to download ${zipUrl}: ${response.status} ${response.statusText}`);
|
||||
}
|
||||
|
||||
fs.writeFileSync(zipPath, Buffer.from(await response.arrayBuffer()));
|
||||
|
||||
const unzip = spawnSync('/usr/bin/unzip', ['-o', '-q', zipPath, '-d', extractDir], {
|
||||
stdio: 'pipe',
|
||||
encoding: 'utf8',
|
||||
});
|
||||
|
||||
if (unzip.status !== 0) {
|
||||
throw new Error(`Failed to unzip ${zipPath}: ${unzip.stderr || unzip.stdout}`);
|
||||
}
|
||||
|
||||
const xmlFiles = fs.readdirSync(extractDir).filter((entry) => entry.endsWith('.xml'));
|
||||
if (xmlFiles.length !== 1) {
|
||||
throw new Error(`Expected one XML file for ${source.sourcePath}, found ${xmlFiles.length}`);
|
||||
}
|
||||
|
||||
return path.join(extractDir, xmlFiles[0]);
|
||||
};
|
||||
|
||||
const getMetadata = (firstNorm) => {
|
||||
const statusNotes = [...firstNorm.matchAll(/<standkommentar>([\s\S]*?)<\/standkommentar>/g)]
|
||||
.map((match) => cleanXmlText(match[1]));
|
||||
|
||||
return {
|
||||
officialAbbreviation: extractFirst(firstNorm, 'amtabk'),
|
||||
legalAbbreviation: extractFirst(firstNorm, 'jurabk'),
|
||||
title: extractFirst(firstNorm, 'langue'),
|
||||
issuedAt: extractFirst(firstNorm, 'ausfertigung-datum'),
|
||||
statusNotes,
|
||||
};
|
||||
};
|
||||
|
||||
const parseFederalSource = async (source) => {
|
||||
const xmlPath = await downloadFederalXml(source);
|
||||
const xml = fs.readFileSync(xmlPath, 'utf8');
|
||||
const sourceFileName = path.basename(xmlPath);
|
||||
const firstNorm = xml.match(/<norm[\s\S]*?<\/norm>/)?.[0] || '';
|
||||
const metadata = getMetadata(firstNorm);
|
||||
const norms = [...xml.matchAll(/<norm\b[^>]*doknr="([^"]+)"[^>]*>([\s\S]*?)<\/norm>/g)];
|
||||
|
||||
const ruleSections = norms.map((match, index) => {
|
||||
const sourceNormId = match[1];
|
||||
const normXml = match[2];
|
||||
const reference = extractFirst(normXml, 'enbez') || metadata.officialAbbreviation || source.scheduleId;
|
||||
const title = extractFirst(normXml, 'titel');
|
||||
const contentMatch = normXml.match(/<Content>([\s\S]*?)<\/Content>/);
|
||||
const text = contentMatch ? cleanXmlText(contentMatch[1]) : '';
|
||||
|
||||
return {
|
||||
id: `${source.scheduleId}-section-${index + 1}`,
|
||||
scheduleId: source.scheduleId,
|
||||
sourceNormId,
|
||||
reference,
|
||||
title,
|
||||
text,
|
||||
};
|
||||
}).filter((section) => section.text);
|
||||
|
||||
const feeRows = [];
|
||||
let rowIndex = 0;
|
||||
|
||||
for (const rowMatch of xml.matchAll(/<row[\s\S]*?<\/row>/g)) {
|
||||
const cells = [...rowMatch[0].matchAll(/<entry[^>]*>([\s\S]*?)<\/entry>/g)]
|
||||
.map((entryMatch) => cleanXmlText(entryMatch[1]))
|
||||
.filter(Boolean);
|
||||
|
||||
if (cells.length < 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const rowType = getRowType(cells, source.rowMode);
|
||||
if (!rowType) {
|
||||
continue;
|
||||
}
|
||||
|
||||
rowIndex += 1;
|
||||
const thirdCell = cells[2];
|
||||
const lastCell = cells.at(-1) || '';
|
||||
const points = rowType === 'fee-entry' && thirdCell && /^\d+$/.test(thirdCell)
|
||||
? Number(thirdCell)
|
||||
: undefined;
|
||||
const amountEur = lastCell.includes('€') || (rowType === 'fee-entry' && cells.length >= 4)
|
||||
? parseAmount(lastCell)
|
||||
: undefined;
|
||||
|
||||
feeRows.push({
|
||||
id: `${source.scheduleId}-row-${rowIndex}`,
|
||||
scheduleId: source.scheduleId,
|
||||
rowType,
|
||||
rowIndex,
|
||||
code: rowType === 'fee-entry' ? cells[0] : undefined,
|
||||
description: rowType === 'fee-entry' ? cells[1] : undefined,
|
||||
cells,
|
||||
points,
|
||||
amountEur,
|
||||
sourceNormId: undefined,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
scheduleId: source.scheduleId,
|
||||
dataStatus: feeRows.length ? 'federal-law-fee-data' : 'federal-law-rules-only',
|
||||
edition: metadata.statusNotes.join(' ') || `Retrieved ${generatedAt}`,
|
||||
source: {
|
||||
name: 'Gesetze im Internet',
|
||||
url: `https://www.gesetze-im-internet.de/${source.sourcePath}/xml.zip`,
|
||||
pageUrl: `https://www.gesetze-im-internet.de/${source.sourcePath}/`,
|
||||
retrievedAt: generatedAt,
|
||||
sourceFileName,
|
||||
officialAbbreviation: metadata.officialAbbreviation,
|
||||
legalAbbreviation: metadata.legalAbbreviation,
|
||||
title: metadata.title,
|
||||
issuedAt: metadata.issuedAt,
|
||||
statusNotes: metadata.statusNotes,
|
||||
},
|
||||
feeRows,
|
||||
ruleSections,
|
||||
notes: feeRows.length
|
||||
? ['Generated from official federal XML table rows. Original row cells are preserved in order.']
|
||||
: ['No separate fee table rows were present in the federal XML. Fee rules are represented as rule sections.'],
|
||||
};
|
||||
};
|
||||
|
||||
const getExternalSourceData = (source) => ({
|
||||
scheduleId: source.scheduleId,
|
||||
dataStatus: 'external-source-pending',
|
||||
edition: `Source identified ${generatedAt}`,
|
||||
source: {
|
||||
name: source.sourceName,
|
||||
url: source.sourceUrl,
|
||||
retrievedAt: generatedAt,
|
||||
},
|
||||
feeRows: [],
|
||||
ruleSections: [],
|
||||
notes: [source.note],
|
||||
});
|
||||
|
||||
for (const source of allSources) {
|
||||
const filePath = path.join(outDir, `${source.fileName}.ts`);
|
||||
if (fs.existsSync(filePath)) {
|
||||
fs.rmSync(filePath);
|
||||
}
|
||||
}
|
||||
|
||||
const generatedData = [];
|
||||
|
||||
for (const source of federalSources) {
|
||||
const data = await parseFederalSource(source);
|
||||
writeGeneratedTsFile(source.fileName, source.exportName, data);
|
||||
generatedData.push({ ...source, data });
|
||||
console.log(`${source.fileName}.ts: ${data.dataStatus}, rows=${data.feeRows.length}, sections=${data.ruleSections.length}`);
|
||||
}
|
||||
|
||||
for (const source of externalSources) {
|
||||
const data = getExternalSourceData(source);
|
||||
writeGeneratedTsFile(source.fileName, source.exportName, data);
|
||||
generatedData.push({ ...source, data });
|
||||
console.log(`${source.fileName}.ts: ${data.dataStatus}, rows=0, sections=0`);
|
||||
}
|
||||
|
||||
const indexImports = generatedData
|
||||
.map((source) => `import { ${source.exportName} } from './${source.fileName}.js';`)
|
||||
.join('\n');
|
||||
|
||||
const indexExports = generatedData
|
||||
.map((source) => `export { ${source.exportName} } from './${source.fileName}.js';`)
|
||||
.join('\n');
|
||||
|
||||
const indexBody = [
|
||||
`// Generated by scripts/generate-germany.mjs on ${generatedAt}.`,
|
||||
'// Do not edit this file manually.',
|
||||
"import type { IFeeScheduleData } from '../feeschedules.types.js';",
|
||||
indexImports,
|
||||
'',
|
||||
indexExports,
|
||||
'',
|
||||
'export const GERMANY_FEE_SCHEDULE_DATA: IFeeScheduleData[] = [',
|
||||
...generatedData.map((source) => ` ${source.exportName},`),
|
||||
'];',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
fs.writeFileSync(path.join(outDir, 'index.ts'), indexBody);
|
||||
Reference in New Issue
Block a user