169 lines
6.0 KiB
TypeScript
169 lines
6.0 KiB
TypeScript
import * as plugins from './smartsitemap.plugins.js';
|
|
import type * as interfaces from './interfaces/index.js';
|
|
import { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js';
|
|
|
|
/**
|
|
* A Node.js Readable stream that generates sitemap XML incrementally.
|
|
* Suitable for very large sitemaps (millions of URLs) that cannot be held in memory.
|
|
*
|
|
* Usage:
|
|
* const stream = new SitemapStream();
|
|
* stream.pipe(createWriteStream('sitemap.xml'));
|
|
* stream.pushUrl({ loc: 'https://example.com/' });
|
|
* stream.pushUrl({ loc: 'https://example.com/about' });
|
|
* stream.finish();
|
|
*/
|
|
export class SitemapStream extends plugins.Readable {
|
|
private options: interfaces.ISitemapOptions;
|
|
private urlCount = 0;
|
|
private headerWritten = false;
|
|
private finished = false;
|
|
private namespaces: Set<string> = new Set();
|
|
|
|
constructor(options?: interfaces.ISitemapOptions) {
|
|
super({ encoding: 'utf-8' });
|
|
this.options = {
|
|
prettyPrint: true,
|
|
...options,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Push a URL entry into the stream.
|
|
* The URL is immediately rendered to XML and pushed to the readable buffer.
|
|
*/
|
|
pushUrl(url: interfaces.ISitemapUrl): boolean {
|
|
if (this.finished) {
|
|
throw new Error('Cannot push URLs after calling finish()');
|
|
}
|
|
|
|
// Detect needed namespaces
|
|
if (url.images?.length) this.namespaces.add('image');
|
|
if (url.videos?.length) this.namespaces.add('video');
|
|
if (url.news) this.namespaces.add('news');
|
|
if (url.alternates?.length) this.namespaces.add('xhtml');
|
|
|
|
// Write header on first URL
|
|
if (!this.headerWritten) {
|
|
this.writeHeader();
|
|
}
|
|
|
|
// Build URL element XML using XmlRenderer internals
|
|
const indent = this.options.prettyPrint !== false ? ' ' : '';
|
|
const nl = this.options.prettyPrint !== false ? '\n' : '';
|
|
|
|
let urlXml = `${indent}<url>${nl}`;
|
|
urlXml += `${indent}${indent}<loc>${XmlRenderer.escapeXml(url.loc)}</loc>${nl}`;
|
|
|
|
if (url.lastmod != null) {
|
|
urlXml += `${indent}${indent}<lastmod>${XmlRenderer.formatDate(url.lastmod)}</lastmod>${nl}`;
|
|
}
|
|
|
|
const changefreq = url.changefreq ?? this.options.defaultChangeFreq;
|
|
if (changefreq) {
|
|
urlXml += `${indent}${indent}<changefreq>${changefreq}</changefreq>${nl}`;
|
|
}
|
|
|
|
const priority = url.priority ?? this.options.defaultPriority;
|
|
if (priority != null) {
|
|
urlXml += `${indent}${indent}<priority>${priority.toFixed(1)}</priority>${nl}`;
|
|
}
|
|
|
|
// Extensions (simplified inline rendering for streaming)
|
|
if (url.images) {
|
|
for (const img of url.images) {
|
|
urlXml += `${indent}${indent}<image:image>${nl}`;
|
|
urlXml += `${indent}${indent}${indent}<image:loc>${XmlRenderer.escapeXml(img.loc)}</image:loc>${nl}`;
|
|
if (img.caption) urlXml += `${indent}${indent}${indent}<image:caption>${XmlRenderer.escapeXml(img.caption)}</image:caption>${nl}`;
|
|
if (img.title) urlXml += `${indent}${indent}${indent}<image:title>${XmlRenderer.escapeXml(img.title)}</image:title>${nl}`;
|
|
urlXml += `${indent}${indent}</image:image>${nl}`;
|
|
}
|
|
}
|
|
|
|
if (url.news) {
|
|
urlXml += `${indent}${indent}<news:news>${nl}`;
|
|
urlXml += `${indent}${indent}${indent}<news:publication>${nl}`;
|
|
urlXml += `${indent}${indent}${indent}${indent}<news:name>${XmlRenderer.escapeXml(url.news.publication.name)}</news:name>${nl}`;
|
|
urlXml += `${indent}${indent}${indent}${indent}<news:language>${url.news.publication.language}</news:language>${nl}`;
|
|
urlXml += `${indent}${indent}${indent}</news:publication>${nl}`;
|
|
urlXml += `${indent}${indent}${indent}<news:publication_date>${XmlRenderer.formatDate(url.news.publicationDate)}</news:publication_date>${nl}`;
|
|
urlXml += `${indent}${indent}${indent}<news:title>${XmlRenderer.escapeXml(url.news.title)}</news:title>${nl}`;
|
|
if (url.news.keywords) {
|
|
const kw = Array.isArray(url.news.keywords) ? url.news.keywords.join(', ') : url.news.keywords;
|
|
urlXml += `${indent}${indent}${indent}<news:keywords>${XmlRenderer.escapeXml(kw)}</news:keywords>${nl}`;
|
|
}
|
|
urlXml += `${indent}${indent}</news:news>${nl}`;
|
|
}
|
|
|
|
if (url.alternates) {
|
|
for (const alt of url.alternates) {
|
|
urlXml += `${indent}${indent}<xhtml:link rel="alternate" hreflang="${alt.hreflang}" href="${XmlRenderer.escapeXml(alt.href)}"/>${nl}`;
|
|
}
|
|
}
|
|
|
|
urlXml += `${indent}</url>${nl}`;
|
|
|
|
this.urlCount++;
|
|
return this.push(urlXml);
|
|
}
|
|
|
|
/**
|
|
* Signal that no more URLs will be added.
|
|
* Writes the closing tag and ends the stream.
|
|
*/
|
|
finish(): void {
|
|
if (this.finished) return;
|
|
this.finished = true;
|
|
|
|
if (!this.headerWritten) {
|
|
// Empty sitemap
|
|
this.writeHeader();
|
|
}
|
|
|
|
this.push('</urlset>\n');
|
|
this.push(null); // signal end of stream
|
|
}
|
|
|
|
/** Get the number of URLs written so far */
|
|
get count(): number {
|
|
return this.urlCount;
|
|
}
|
|
|
|
// Required by Readable
|
|
_read(): void {
|
|
// Data is pushed via pushUrl(), not pulled
|
|
}
|
|
|
|
/**
|
|
* Write the XML header and opening urlset tag.
|
|
* Namespace declarations are based on what's been detected so far.
|
|
*/
|
|
private writeHeader(): void {
|
|
this.headerWritten = true;
|
|
const nl = this.options.prettyPrint !== false ? '\n' : '';
|
|
|
|
let header = `<?xml version="1.0" encoding="UTF-8"?>${nl}`;
|
|
|
|
if (this.options.xslUrl) {
|
|
header += `<?xml-stylesheet type="text/xsl" href="${XmlRenderer.escapeXml(this.options.xslUrl)}"?>${nl}`;
|
|
}
|
|
|
|
header += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"';
|
|
if (this.namespaces.has('image')) {
|
|
header += `${nl} xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"`;
|
|
}
|
|
if (this.namespaces.has('video')) {
|
|
header += `${nl} xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"`;
|
|
}
|
|
if (this.namespaces.has('news')) {
|
|
header += `${nl} xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"`;
|
|
}
|
|
if (this.namespaces.has('xhtml')) {
|
|
header += `${nl} xmlns:xhtml="http://www.w3.org/1999/xhtml"`;
|
|
}
|
|
header += `>${nl}`;
|
|
|
|
this.push(header);
|
|
}
|
|
}
|