import * as plugins from './smartsitemap.plugins.js'; import type * as interfaces from './interfaces/index.js'; import { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js'; /** * A Node.js Readable stream that generates sitemap XML incrementally. * Suitable for very large sitemaps (millions of URLs) that cannot be held in memory. * * Usage: * const stream = new SitemapStream(); * stream.pipe(createWriteStream('sitemap.xml')); * stream.pushUrl({ loc: 'https://example.com/' }); * stream.pushUrl({ loc: 'https://example.com/about' }); * stream.finish(); */ export class SitemapStream extends plugins.Readable { private options: interfaces.ISitemapOptions; private urlCount = 0; private headerWritten = false; private finished = false; private namespaces: Set = new Set(); constructor(options?: interfaces.ISitemapOptions) { super({ encoding: 'utf-8' }); this.options = { prettyPrint: true, ...options, }; } /** * Push a URL entry into the stream. * The URL is immediately rendered to XML and pushed to the readable buffer. */ pushUrl(url: interfaces.ISitemapUrl): boolean { if (this.finished) { throw new Error('Cannot push URLs after calling finish()'); } // Detect needed namespaces if (url.images?.length) this.namespaces.add('image'); if (url.videos?.length) this.namespaces.add('video'); if (url.news) this.namespaces.add('news'); if (url.alternates?.length) this.namespaces.add('xhtml'); // Write header on first URL if (!this.headerWritten) { this.writeHeader(); } // Build URL element XML using XmlRenderer internals const indent = this.options.prettyPrint !== false ? ' ' : ''; const nl = this.options.prettyPrint !== false ? '\n' : ''; let urlXml = `${indent}${nl}`; urlXml += `${indent}${indent}${XmlRenderer.escapeXml(url.loc)}${nl}`; if (url.lastmod != null) { urlXml += `${indent}${indent}${XmlRenderer.formatDate(url.lastmod)}${nl}`; } const changefreq = url.changefreq ?? this.options.defaultChangeFreq; if (changefreq) { urlXml += `${indent}${indent}${changefreq}${nl}`; } const priority = url.priority ?? this.options.defaultPriority; if (priority != null) { urlXml += `${indent}${indent}${priority.toFixed(1)}${nl}`; } // Extensions (simplified inline rendering for streaming) if (url.images) { for (const img of url.images) { urlXml += `${indent}${indent}${nl}`; urlXml += `${indent}${indent}${indent}${XmlRenderer.escapeXml(img.loc)}${nl}`; if (img.caption) urlXml += `${indent}${indent}${indent}${XmlRenderer.escapeXml(img.caption)}${nl}`; if (img.title) urlXml += `${indent}${indent}${indent}${XmlRenderer.escapeXml(img.title)}${nl}`; urlXml += `${indent}${indent}${nl}`; } } if (url.news) { urlXml += `${indent}${indent}${nl}`; urlXml += `${indent}${indent}${indent}${nl}`; urlXml += `${indent}${indent}${indent}${indent}${XmlRenderer.escapeXml(url.news.publication.name)}${nl}`; urlXml += `${indent}${indent}${indent}${indent}${url.news.publication.language}${nl}`; urlXml += `${indent}${indent}${indent}${nl}`; urlXml += `${indent}${indent}${indent}${XmlRenderer.formatDate(url.news.publicationDate)}${nl}`; urlXml += `${indent}${indent}${indent}${XmlRenderer.escapeXml(url.news.title)}${nl}`; if (url.news.keywords) { const kw = Array.isArray(url.news.keywords) ? url.news.keywords.join(', ') : url.news.keywords; urlXml += `${indent}${indent}${indent}${XmlRenderer.escapeXml(kw)}${nl}`; } urlXml += `${indent}${indent}${nl}`; } if (url.alternates) { for (const alt of url.alternates) { urlXml += `${indent}${indent}${nl}`; } } urlXml += `${indent}${nl}`; this.urlCount++; return this.push(urlXml); } /** * Signal that no more URLs will be added. * Writes the closing tag and ends the stream. */ finish(): void { if (this.finished) return; this.finished = true; if (!this.headerWritten) { // Empty sitemap this.writeHeader(); } this.push('\n'); this.push(null); // signal end of stream } /** Get the number of URLs written so far */ get count(): number { return this.urlCount; } // Required by Readable _read(): void { // Data is pushed via pushUrl(), not pulled } /** * Write the XML header and opening urlset tag. * Namespace declarations are based on what's been detected so far. */ private writeHeader(): void { this.headerWritten = true; const nl = this.options.prettyPrint !== false ? '\n' : ''; let header = `${nl}`; if (this.options.xslUrl) { header += `${nl}`; } header += '${nl}`; this.push(header); } }