Files
smartsitemap/ts/smartsitemap.classes.urlsetbuilder.ts

275 lines
9.2 KiB
TypeScript

import * as plugins from './smartsitemap.plugins.js';
import type * as interfaces from './interfaces/index.js';
import { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js';
import { SitemapValidator } from './smartsitemap.classes.validator.js';
import { FeedImporter } from './smartsitemap.classes.feedimporter.js';
import { YamlImporter } from './smartsitemap.classes.yamlimporter.js';
import type { SitemapStream } from './smartsitemap.classes.sitemapstream.js';
/**
* Chainable builder for creating standard XML sitemaps (<urlset>).
* Every mutating method returns `this` for fluent chaining.
*
* Supports all sitemap extensions (images, videos, news, hreflang),
* auto-splitting at 50K URLs, multiple output formats, and validation.
*/
export class UrlsetBuilder {
protected urls: interfaces.ISitemapUrl[] = [];
protected options: interfaces.ISitemapOptions;
constructor(options?: interfaces.ISitemapOptions) {
this.options = {
prettyPrint: true,
maxUrlsPerSitemap: 50000,
validate: true,
...options,
};
}
// ──────────────────────────────────────────────
// Adding URLs
// ──────────────────────────────────────────────
/** Add a single URL with full options */
add(url: interfaces.ISitemapUrl): this {
this.urls.push(url);
return this;
}
/** Add a URL by loc string, optionally with lastmod */
addUrl(loc: string, lastmod?: Date | string | number): this {
const url: interfaces.ISitemapUrl = { loc };
if (lastmod != null) {
url.lastmod = lastmod;
}
this.urls.push(url);
return this;
}
/** Add multiple URL objects */
addUrls(urls: interfaces.ISitemapUrl[]): this {
this.urls.push(...urls);
return this;
}
/** Add URLs from a plain string array */
addFromArray(locs: string[]): this {
for (const loc of locs) {
this.urls.push({ loc });
}
return this;
}
// ──────────────────────────────────────────────
// Bulk operations
// ──────────────────────────────────────────────
/** Merge all URLs from another UrlsetBuilder */
merge(other: UrlsetBuilder): this {
this.urls.push(...other.getUrls());
return this;
}
/** Filter URLs by predicate (in-place) */
filter(predicate: (url: interfaces.ISitemapUrl) => boolean): this {
this.urls = this.urls.filter(predicate);
return this;
}
/** Transform URLs (in-place) */
map(transform: (url: interfaces.ISitemapUrl) => interfaces.ISitemapUrl): this {
this.urls = this.urls.map(transform);
return this;
}
/** Sort URLs (in-place) */
sort(compareFn?: (a: interfaces.ISitemapUrl, b: interfaces.ISitemapUrl) => number): this {
this.urls.sort(compareFn ?? ((a, b) => a.loc.localeCompare(b.loc)));
return this;
}
/** Remove duplicate URLs by loc */
dedupe(): this {
const seen = new Set<string>();
this.urls = this.urls.filter((url) => {
if (seen.has(url.loc)) return false;
seen.add(url.loc);
return true;
});
return this;
}
// ──────────────────────────────────────────────
// Defaults
// ──────────────────────────────────────────────
/** Set default changefreq for URLs that don't specify one */
setDefaultChangeFreq(freq: interfaces.TChangeFreq): this {
this.options.defaultChangeFreq = freq;
return this;
}
/** Set default priority for URLs that don't specify one */
setDefaultPriority(priority: number): this {
this.options.defaultPriority = priority;
return this;
}
/** Set XSL stylesheet URL for browser rendering */
setXslUrl(url: string): this {
this.options.xslUrl = url;
return this;
}
// ──────────────────────────────────────────────
// Import sources (async, return Promise<this>)
// ──────────────────────────────────────────────
/** Import URLs from an RSS/Atom feed URL */
async importFromFeedUrl(feedUrl: string, options?: interfaces.IFeedImportOptions): Promise<this> {
const imported = await FeedImporter.fromUrl(feedUrl, options);
this.urls.push(...imported);
return this;
}
/** Import URLs from an RSS/Atom feed XML string */
async importFromFeedString(feedXml: string, options?: interfaces.IFeedImportOptions): Promise<this> {
const imported = await FeedImporter.fromString(feedXml, options);
this.urls.push(...imported);
return this;
}
/** Import URLs from a YAML config string */
async importFromYaml(yamlString: string): Promise<this> {
const imported = await YamlImporter.parseConfig(yamlString);
this.urls.push(...imported);
return this;
}
/** Import from @tsclass/tsclass IArticle array */
importFromArticles(articles: plugins.tsclass.content.IArticle[]): this {
for (const article of articles) {
const url: interfaces.ISitemapUrl = {
loc: article.url,
lastmod: article.timestamp ? new Date(article.timestamp) : undefined,
};
this.urls.push(url);
}
return this;
}
// ──────────────────────────────────────────────
// Output
// ──────────────────────────────────────────────
/** Export as sitemap XML string */
toXml(): string {
return XmlRenderer.renderUrlset(this.urls, this.options);
}
/** Export as plain text (one URL per line) */
toTxt(): string {
return XmlRenderer.renderTxt(this.urls);
}
/** Export as JSON string */
toJson(): string {
return XmlRenderer.renderJson(this.urls);
}
/** Export as gzipped XML buffer */
async toGzipBuffer(): Promise<Buffer> {
const xml = this.toXml();
const gzip = plugins.promisify(plugins.zlib.gzip);
return gzip(Buffer.from(xml, 'utf-8')) as Promise<Buffer>;
}
/**
* Export with automatic index splitting.
* If URL count exceeds maxUrlsPerSitemap, returns a sitemap index
* plus individual sitemap chunks.
*/
toSitemapSet(): interfaces.ISitemapSet {
const maxUrls = Math.min(this.options.maxUrlsPerSitemap ?? 50000, 50000);
if (this.urls.length <= maxUrls) {
return {
needsIndex: false,
indexXml: null,
sitemaps: [{ filename: 'sitemap.xml', xml: this.toXml() }],
};
}
// Split into chunks
const chunks: interfaces.ISitemapUrl[][] = [];
for (let i = 0; i < this.urls.length; i += maxUrls) {
chunks.push(this.urls.slice(i, i + maxUrls));
}
const baseUrl = this.options.baseUrl || '';
const sitemaps: Array<{ filename: string; xml: string }> = [];
const indexEntries: Array<{ loc: string; lastmod?: string }> = [];
for (let i = 0; i < chunks.length; i++) {
const filename = `sitemap-${i + 1}.xml`;
const xml = XmlRenderer.renderUrlset(chunks[i], this.options);
sitemaps.push({ filename, xml });
indexEntries.push({
loc: baseUrl ? `${baseUrl.replace(/\/$/, '')}/${filename}` : filename,
});
}
const indexXml = XmlRenderer.renderIndex(indexEntries, this.options);
return {
needsIndex: true,
indexXml,
sitemaps,
};
}
/** Create a Node.js Readable stream for large sitemaps */
toStream(): SitemapStream {
// Lazy import to avoid circular dependency issues at module level
const { SitemapStream: SitemapStreamClass } = require('./smartsitemap.classes.sitemapstream.js');
const stream = new SitemapStreamClass(this.options);
// Push all URLs into the stream asynchronously
process.nextTick(() => {
for (const url of this.urls) {
stream.pushUrl(url);
}
stream.finish();
});
return stream;
}
// ──────────────────────────────────────────────
// Inspection
// ──────────────────────────────────────────────
/** Get the raw URL array */
getUrls(): interfaces.ISitemapUrl[] {
return [...this.urls];
}
/** Get the number of URLs */
get count(): number {
return this.urls.length;
}
/** Validate this sitemap against the protocol specification */
validate(): interfaces.IValidationResult {
return SitemapValidator.validateUrlset(this.urls, this.options);
}
/** Get statistics about this sitemap */
stats(): interfaces.ISitemapStats {
return SitemapValidator.computeStats(this.urls, this.options);
}
/** Get the options for this builder */
getOptions(): interfaces.ISitemapOptions {
return { ...this.options };
}
}