BREAKING CHANGE(api): redesign smartsitemap around builder-based sitemap creation, parsing, validation, and import utilities
This commit is contained in:
@@ -3,6 +3,6 @@
|
||||
*/
|
||||
export const commitinfo = {
|
||||
name: '@push.rocks/smartsitemap',
|
||||
version: '2.0.4',
|
||||
description: 'A module for generating and managing sitemaps, supporting dynamic sitemap generation from feeds.'
|
||||
version: '4.0.0',
|
||||
description: 'A comprehensive TypeScript sitemap library with builder API, supporting standard, news, image, video, and hreflang sitemaps with auto-splitting, streaming, validation, and RSS feed integration.'
|
||||
}
|
||||
|
||||
47
ts/index.ts
47
ts/index.ts
@@ -1,3 +1,44 @@
|
||||
export * from './smartsitemap.classes.smartsitemap.js';
|
||||
export * from './smartsitemap.classes.sitemapnews.js';
|
||||
export * from './smartsitemap.classes.sitemapwebsite.js';
|
||||
// Main facade
|
||||
export { SmartSitemap } from './smartsitemap.classes.smartsitemap.js';
|
||||
|
||||
// Builders
|
||||
export { UrlsetBuilder } from './smartsitemap.classes.urlsetbuilder.js';
|
||||
export { NewsSitemapBuilder } from './smartsitemap.classes.newsbuilder.js';
|
||||
export { SitemapIndexBuilder } from './smartsitemap.classes.indexbuilder.js';
|
||||
|
||||
// Parser
|
||||
export { SitemapParser } from './smartsitemap.classes.sitemapparser.js';
|
||||
|
||||
// Stream
|
||||
export { SitemapStream } from './smartsitemap.classes.sitemapstream.js';
|
||||
|
||||
// Utilities
|
||||
export { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js';
|
||||
export { SitemapValidator } from './smartsitemap.classes.validator.js';
|
||||
|
||||
// Feed & YAML importers
|
||||
export { FeedImporter } from './smartsitemap.classes.feedimporter.js';
|
||||
export { YamlImporter } from './smartsitemap.classes.yamlimporter.js';
|
||||
|
||||
// All interfaces and types
|
||||
export type {
|
||||
TChangeFreq,
|
||||
TOutputFormat,
|
||||
ISitemapUrl,
|
||||
ISitemapImage,
|
||||
ISitemapVideo,
|
||||
ISitemapNews,
|
||||
ISitemapAlternate,
|
||||
ISitemapIndexEntry,
|
||||
ISitemapOptions,
|
||||
INewsSitemapOptions,
|
||||
IFeedImportOptions,
|
||||
IFeedItem,
|
||||
ISitemapYamlConfig,
|
||||
IParsedSitemap,
|
||||
IValidationError,
|
||||
IValidationWarning,
|
||||
IValidationResult,
|
||||
ISitemapStats,
|
||||
ISitemapSet,
|
||||
} from './interfaces/index.js';
|
||||
|
||||
@@ -1,42 +1,277 @@
|
||||
export interface ISitemapYaml {
|
||||
daily: string[];
|
||||
// ============================================================
|
||||
// CORE TYPES
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Change frequency values per the sitemap protocol specification.
|
||||
* Note: Google ignores changefreq, but other search engines may use it.
|
||||
*/
|
||||
export type TChangeFreq =
|
||||
| 'always'
|
||||
| 'hourly'
|
||||
| 'daily'
|
||||
| 'weekly'
|
||||
| 'monthly'
|
||||
| 'yearly'
|
||||
| 'never';
|
||||
|
||||
/** Supported output formats */
|
||||
export type TOutputFormat = 'xml' | 'txt' | 'json';
|
||||
|
||||
// ============================================================
|
||||
// URL ENTRY — the core unit of a sitemap
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* A single URL entry in a sitemap, supporting all standard extensions.
|
||||
*/
|
||||
export interface ISitemapUrl {
|
||||
/** Absolute URL of the page (required, max 2048 chars) */
|
||||
loc: string;
|
||||
/** Last modification date — accepts Date, ISO string, or Unix timestamp (ms) */
|
||||
lastmod?: Date | string | number;
|
||||
/** How frequently the page changes */
|
||||
changefreq?: TChangeFreq;
|
||||
/** Priority relative to other URLs on your site, 0.0 to 1.0 */
|
||||
priority?: number;
|
||||
/** Image sitemap extension entries */
|
||||
images?: ISitemapImage[];
|
||||
/** Video sitemap extension entries */
|
||||
videos?: ISitemapVideo[];
|
||||
/** News sitemap extension */
|
||||
news?: ISitemapNews;
|
||||
/** Alternate language versions (hreflang) */
|
||||
alternates?: ISitemapAlternate[];
|
||||
}
|
||||
|
||||
export interface IRssItem {
|
||||
[key: string]: any;
|
||||
link?: string;
|
||||
guid?: string;
|
||||
// ============================================================
|
||||
// SITEMAP EXTENSIONS
|
||||
// ============================================================
|
||||
|
||||
export interface ISitemapImage {
|
||||
/** URL of the image (required) */
|
||||
loc: string;
|
||||
/** Caption for the image */
|
||||
caption?: string;
|
||||
/** Title of the image */
|
||||
title?: string;
|
||||
pubDate?: string;
|
||||
creator?: string;
|
||||
content?: string;
|
||||
isoDate?: string;
|
||||
categories?: string[];
|
||||
contentSnippet?: string;
|
||||
enclosure?: any;
|
||||
/** Geographic location (e.g. "New York, USA") */
|
||||
geoLocation?: string;
|
||||
/** URL to the image license */
|
||||
licenseUrl?: string;
|
||||
}
|
||||
|
||||
export interface IParsedSiteMap {
|
||||
urlset: {
|
||||
url:
|
||||
| {
|
||||
loc: string;
|
||||
lastmod: string;
|
||||
changefreq: string;
|
||||
}
|
||||
| {
|
||||
loc: string;
|
||||
lastmod: string;
|
||||
changefreq: string;
|
||||
}[]
|
||||
| {
|
||||
loc: string;
|
||||
'news:news': {
|
||||
'news:publication': [];
|
||||
'news:keywords': string;
|
||||
'news:publication_date': string;
|
||||
'news:title': string;
|
||||
};
|
||||
}[];
|
||||
};
|
||||
export interface ISitemapVideo {
|
||||
/** URL to the video thumbnail (required) */
|
||||
thumbnailLoc: string;
|
||||
/** Title of the video (required) */
|
||||
title: string;
|
||||
/** Description of the video, max 2048 chars (required) */
|
||||
description: string;
|
||||
/** URL of the actual video media file */
|
||||
contentLoc?: string;
|
||||
/** URL of the embeddable player — at least one of contentLoc or playerLoc required */
|
||||
playerLoc?: string;
|
||||
/** Duration in seconds (1–28800) */
|
||||
duration?: number;
|
||||
/** Rating 0.0 to 5.0 */
|
||||
rating?: number;
|
||||
/** Number of views */
|
||||
viewCount?: number;
|
||||
/** Publication date */
|
||||
publicationDate?: Date | string;
|
||||
/** Whether the video is family friendly (default true) */
|
||||
familyFriendly?: boolean;
|
||||
/** Tags for the video (max 32) */
|
||||
tags?: string[];
|
||||
/** Whether this is a live stream */
|
||||
live?: boolean;
|
||||
/** Whether a subscription is required to view */
|
||||
requiresSubscription?: boolean;
|
||||
}
|
||||
|
||||
export interface ISitemapNews {
|
||||
/** Publication information */
|
||||
publication: {
|
||||
/** Publication name (e.g. "The New York Times") */
|
||||
name: string;
|
||||
/** Language code (ISO 639, e.g. "en", "de", "zh-cn") */
|
||||
language: string;
|
||||
};
|
||||
/** Publication date of the article */
|
||||
publicationDate: Date | string | number;
|
||||
/** Article title */
|
||||
title: string;
|
||||
/** Keywords (array or comma-separated string) */
|
||||
keywords?: string[] | string;
|
||||
}
|
||||
|
||||
export interface ISitemapAlternate {
|
||||
/** Language code (ISO 639) or 'x-default' for the default version */
|
||||
hreflang: string;
|
||||
/** URL for this language version */
|
||||
href: string;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// SITEMAP INDEX
|
||||
// ============================================================
|
||||
|
||||
export interface ISitemapIndexEntry {
|
||||
/** URL to the sitemap file */
|
||||
loc: string;
|
||||
/** Last modification date of the referenced sitemap */
|
||||
lastmod?: Date | string | number;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// CONFIGURATION
|
||||
// ============================================================
|
||||
|
||||
export interface ISitemapOptions {
|
||||
/** Base URL for the website (used to resolve relative URLs and for auto-split filenames) */
|
||||
baseUrl?: string;
|
||||
/** XSL stylesheet URL for browser-viewable sitemaps */
|
||||
xslUrl?: string;
|
||||
/** Default changefreq for URLs that don't specify one */
|
||||
defaultChangeFreq?: TChangeFreq;
|
||||
/** Default priority for URLs that don't specify one (0.0–1.0) */
|
||||
defaultPriority?: number;
|
||||
/** Whether to pretty-print XML output (default: true) */
|
||||
prettyPrint?: boolean;
|
||||
/** Maximum URLs per sitemap file before auto-splitting (default: 50000, max: 50000) */
|
||||
maxUrlsPerSitemap?: number;
|
||||
/** Enable gzip compression for toGzipBuffer() */
|
||||
gzip?: boolean;
|
||||
/** Whether to validate URLs and fields (default: true) */
|
||||
validate?: boolean;
|
||||
}
|
||||
|
||||
export interface INewsSitemapOptions extends ISitemapOptions {
|
||||
/** Publication name — required for news sitemaps */
|
||||
publicationName: string;
|
||||
/** Publication language (default: 'en') */
|
||||
publicationLanguage?: string;
|
||||
}
|
||||
|
||||
export interface IFeedImportOptions {
|
||||
/** Publication name for news sitemap mapping */
|
||||
publicationName?: string;
|
||||
/** Publication language for news sitemap mapping */
|
||||
publicationLanguage?: string;
|
||||
/** Only include items newer than this date */
|
||||
newerThan?: Date | number;
|
||||
/** Maximum number of items to import */
|
||||
limit?: number;
|
||||
/** Custom mapping function from feed item to sitemap URL (return null to skip) */
|
||||
mapItem?: (item: IFeedItem) => ISitemapUrl | null;
|
||||
}
|
||||
|
||||
/** Shape of a parsed RSS/Atom feed item */
|
||||
export interface IFeedItem {
|
||||
title?: string;
|
||||
link?: string;
|
||||
pubDate?: string;
|
||||
author?: string;
|
||||
content?: string;
|
||||
contentSnippet?: string;
|
||||
isoDate?: string;
|
||||
id?: string;
|
||||
categories?: string[];
|
||||
enclosure?: {
|
||||
url?: string;
|
||||
type?: string;
|
||||
length?: string;
|
||||
};
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// YAML CONFIG
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Enhanced YAML configuration format for defining sitemaps declaratively.
|
||||
* Supports per-frequency URL groups, default settings, and feed imports.
|
||||
*/
|
||||
export interface ISitemapYamlConfig {
|
||||
/** Base URL to prepend to relative paths */
|
||||
baseUrl?: string;
|
||||
/** Default values for all URLs */
|
||||
defaults?: {
|
||||
changefreq?: TChangeFreq;
|
||||
priority?: number;
|
||||
};
|
||||
/** URL groups organized by change frequency */
|
||||
urls?: { [K in TChangeFreq]?: string[] };
|
||||
/** RSS/Atom feeds to import */
|
||||
feeds?: Array<{
|
||||
url: string;
|
||||
type: 'news' | 'standard';
|
||||
publicationName?: string;
|
||||
publicationLanguage?: string;
|
||||
}>;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// PARSED SITEMAP (bidirectional)
|
||||
// ============================================================
|
||||
|
||||
export interface IParsedSitemap {
|
||||
/** Whether this is a urlset or a sitemap index */
|
||||
type: 'urlset' | 'sitemapindex';
|
||||
/** Parsed URL entries (populated when type is 'urlset') */
|
||||
urls: ISitemapUrl[];
|
||||
/** Parsed index entries (populated when type is 'sitemapindex') */
|
||||
sitemaps: ISitemapIndexEntry[];
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// VALIDATION
|
||||
// ============================================================
|
||||
|
||||
export interface IValidationError {
|
||||
field: string;
|
||||
message: string;
|
||||
url?: string;
|
||||
}
|
||||
|
||||
export interface IValidationWarning {
|
||||
field: string;
|
||||
message: string;
|
||||
url?: string;
|
||||
}
|
||||
|
||||
export interface IValidationResult {
|
||||
valid: boolean;
|
||||
errors: IValidationError[];
|
||||
warnings: IValidationWarning[];
|
||||
stats: ISitemapStats;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// STATISTICS
|
||||
// ============================================================
|
||||
|
||||
export interface ISitemapStats {
|
||||
urlCount: number;
|
||||
imageCount: number;
|
||||
videoCount: number;
|
||||
newsCount: number;
|
||||
alternateCount: number;
|
||||
estimatedSizeBytes: number;
|
||||
needsIndex: boolean;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// AUTO-SPLIT OUTPUT
|
||||
// ============================================================
|
||||
|
||||
export interface ISitemapSet {
|
||||
/** Whether the URL count exceeded maxUrlsPerSitemap */
|
||||
needsIndex: boolean;
|
||||
/** The sitemap index XML (null if all URLs fit in one sitemap) */
|
||||
indexXml: string | null;
|
||||
/** Individual sitemap chunks */
|
||||
sitemaps: Array<{ filename: string; xml: string }>;
|
||||
}
|
||||
|
||||
159
ts/smartsitemap.classes.feedimporter.ts
Normal file
159
ts/smartsitemap.classes.feedimporter.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
import * as plugins from './smartsitemap.plugins.js';
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
|
||||
/**
|
||||
* Imports RSS/Atom feeds and converts them to sitemap URL entries.
|
||||
* This is a unique feature of smartsitemap that competitors don't offer.
|
||||
*/
|
||||
export class FeedImporter {
|
||||
/**
|
||||
* Import from a feed URL, returning standard sitemap URL entries.
|
||||
*/
|
||||
static async fromUrl(
|
||||
feedUrl: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): Promise<interfaces.ISitemapUrl[]> {
|
||||
const smartfeed = new plugins.smartfeed.Smartfeed();
|
||||
const feed = await smartfeed.parseFeedFromUrl(feedUrl);
|
||||
return FeedImporter.mapItems(feed.items, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Import from a feed XML string, returning standard sitemap URL entries.
|
||||
*/
|
||||
static async fromString(
|
||||
feedXml: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): Promise<interfaces.ISitemapUrl[]> {
|
||||
const smartfeed = new plugins.smartfeed.Smartfeed();
|
||||
const feed = await smartfeed.parseFeedFromString(feedXml);
|
||||
return FeedImporter.mapItems(feed.items, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Import from a feed URL, returning news sitemap URL entries.
|
||||
*/
|
||||
static async fromUrlAsNews(
|
||||
feedUrl: string,
|
||||
publicationName: string,
|
||||
publicationLanguage?: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): Promise<interfaces.ISitemapUrl[]> {
|
||||
const smartfeed = new plugins.smartfeed.Smartfeed();
|
||||
const feed = await smartfeed.parseFeedFromUrl(feedUrl);
|
||||
return FeedImporter.mapItemsAsNews(feed.items, publicationName, publicationLanguage ?? 'en', options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Import from a feed string, returning news sitemap URL entries.
|
||||
*/
|
||||
static async fromStringAsNews(
|
||||
feedXml: string,
|
||||
publicationName: string,
|
||||
publicationLanguage?: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): Promise<interfaces.ISitemapUrl[]> {
|
||||
const smartfeed = new plugins.smartfeed.Smartfeed();
|
||||
const feed = await smartfeed.parseFeedFromString(feedXml);
|
||||
return FeedImporter.mapItemsAsNews(feed.items, publicationName, publicationLanguage ?? 'en', options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Map parsed feed items to standard sitemap URLs.
|
||||
*/
|
||||
private static mapItems(
|
||||
items: any[],
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): interfaces.ISitemapUrl[] {
|
||||
let filtered = FeedImporter.filterItems(items, options);
|
||||
|
||||
if (options?.mapItem) {
|
||||
const results: interfaces.ISitemapUrl[] = [];
|
||||
for (const item of filtered) {
|
||||
const mapped = options.mapItem(item as interfaces.IFeedItem);
|
||||
if (mapped) results.push(mapped);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
return filtered
|
||||
.filter((item: any) => item.link)
|
||||
.map((item: any) => {
|
||||
const url: interfaces.ISitemapUrl = {
|
||||
loc: item.link,
|
||||
};
|
||||
if (item.isoDate) {
|
||||
url.lastmod = item.isoDate;
|
||||
}
|
||||
return url;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Map parsed feed items to news sitemap URLs.
|
||||
*/
|
||||
private static mapItemsAsNews(
|
||||
items: any[],
|
||||
publicationName: string,
|
||||
publicationLanguage: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): interfaces.ISitemapUrl[] {
|
||||
let filtered = FeedImporter.filterItems(items, options);
|
||||
|
||||
if (options?.mapItem) {
|
||||
const results: interfaces.ISitemapUrl[] = [];
|
||||
for (const item of filtered) {
|
||||
const mapped = options.mapItem(item as interfaces.IFeedItem);
|
||||
if (mapped) results.push(mapped);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
return filtered
|
||||
.filter((item: any) => item.link)
|
||||
.map((item: any) => {
|
||||
const url: interfaces.ISitemapUrl = {
|
||||
loc: item.link,
|
||||
news: {
|
||||
publication: {
|
||||
name: publicationName,
|
||||
language: publicationLanguage,
|
||||
},
|
||||
publicationDate: item.isoDate || new Date().toISOString(),
|
||||
title: item.title || '',
|
||||
keywords: item.categories,
|
||||
},
|
||||
};
|
||||
if (item.isoDate) {
|
||||
url.lastmod = item.isoDate;
|
||||
}
|
||||
return url;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply date and limit filters to feed items.
|
||||
*/
|
||||
private static filterItems(items: any[], options?: interfaces.IFeedImportOptions): any[] {
|
||||
let result = [...items];
|
||||
|
||||
// Filter by date
|
||||
if (options?.newerThan != null) {
|
||||
const threshold = options.newerThan instanceof Date
|
||||
? options.newerThan.getTime()
|
||||
: options.newerThan;
|
||||
|
||||
result = result.filter((item: any) => {
|
||||
if (!item.isoDate) return true; // keep items without dates
|
||||
return new Date(item.isoDate).getTime() >= threshold;
|
||||
});
|
||||
}
|
||||
|
||||
// Apply limit
|
||||
if (options?.limit != null && options.limit > 0) {
|
||||
result = result.slice(0, options.limit);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
82
ts/smartsitemap.classes.indexbuilder.ts
Normal file
82
ts/smartsitemap.classes.indexbuilder.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
import { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js';
|
||||
import { UrlsetBuilder } from './smartsitemap.classes.urlsetbuilder.js';
|
||||
|
||||
/**
|
||||
* Builder for sitemap index files (<sitemapindex>).
|
||||
* Used when you have multiple sitemaps that need to be referenced from a single index.
|
||||
* Every mutating method returns `this` for fluent chaining.
|
||||
*/
|
||||
export class SitemapIndexBuilder {
|
||||
private entries: interfaces.ISitemapIndexEntry[] = [];
|
||||
private options: interfaces.ISitemapOptions;
|
||||
|
||||
constructor(options?: interfaces.ISitemapOptions) {
|
||||
this.options = options ?? {};
|
||||
}
|
||||
|
||||
/** Add a sitemap index entry */
|
||||
add(entry: interfaces.ISitemapIndexEntry): this {
|
||||
this.entries.push(entry);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Add a sitemap by URL, optionally with lastmod */
|
||||
addSitemap(loc: string, lastmod?: Date | string | number): this {
|
||||
const entry: interfaces.ISitemapIndexEntry = { loc };
|
||||
if (lastmod != null) {
|
||||
entry.lastmod = lastmod;
|
||||
}
|
||||
this.entries.push(entry);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Add multiple sitemap entries */
|
||||
addSitemaps(entries: interfaces.ISitemapIndexEntry[]): this {
|
||||
this.entries.push(...entries);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an index and individual sitemaps from a UrlsetBuilder that needs splitting.
|
||||
* The builder's URLs are divided into chunks of maxUrlsPerSitemap.
|
||||
*/
|
||||
static fromBuilder(
|
||||
builder: UrlsetBuilder,
|
||||
baseUrl: string,
|
||||
): { index: SitemapIndexBuilder; sitemaps: UrlsetBuilder[] } {
|
||||
const urls = builder.getUrls();
|
||||
const options = builder.getOptions();
|
||||
const maxUrls = Math.min(options.maxUrlsPerSitemap ?? 50000, 50000);
|
||||
|
||||
const index = new SitemapIndexBuilder(options);
|
||||
const sitemaps: UrlsetBuilder[] = [];
|
||||
|
||||
for (let i = 0; i < urls.length; i += maxUrls) {
|
||||
const chunk = urls.slice(i, i + maxUrls);
|
||||
const chunkBuilder = new UrlsetBuilder(options);
|
||||
chunkBuilder.addUrls(chunk);
|
||||
sitemaps.push(chunkBuilder);
|
||||
|
||||
const filename = `sitemap-${sitemaps.length}.xml`;
|
||||
index.addSitemap(`${baseUrl.replace(/\/$/, '')}/${filename}`);
|
||||
}
|
||||
|
||||
return { index, sitemaps };
|
||||
}
|
||||
|
||||
/** Export as sitemap index XML string */
|
||||
toXml(): string {
|
||||
return XmlRenderer.renderIndex(this.entries, this.options);
|
||||
}
|
||||
|
||||
/** Get all entries */
|
||||
getEntries(): interfaces.ISitemapIndexEntry[] {
|
||||
return [...this.entries];
|
||||
}
|
||||
|
||||
/** Get the number of sitemaps in this index */
|
||||
get count(): number {
|
||||
return this.entries.length;
|
||||
}
|
||||
}
|
||||
95
ts/smartsitemap.classes.newsbuilder.ts
Normal file
95
ts/smartsitemap.classes.newsbuilder.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
import * as plugins from './smartsitemap.plugins.js';
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
import { UrlsetBuilder } from './smartsitemap.classes.urlsetbuilder.js';
|
||||
import { FeedImporter } from './smartsitemap.classes.feedimporter.js';
|
||||
|
||||
/**
|
||||
* Specialized builder for Google News sitemaps.
|
||||
* Extends UrlsetBuilder with news-specific convenience methods.
|
||||
* All standard builder methods (add, filter, merge, etc.) are inherited.
|
||||
*/
|
||||
export class NewsSitemapBuilder extends UrlsetBuilder {
|
||||
private publicationName: string;
|
||||
private publicationLanguage: string;
|
||||
|
||||
constructor(options: interfaces.INewsSitemapOptions) {
|
||||
super(options);
|
||||
this.publicationName = options.publicationName;
|
||||
this.publicationLanguage = options.publicationLanguage ?? 'en';
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a news article URL with convenient parameters.
|
||||
* Automatically fills in publication name and language from constructor options.
|
||||
*/
|
||||
addNewsUrl(
|
||||
loc: string,
|
||||
title: string,
|
||||
publicationDate: Date | string | number,
|
||||
keywords?: string[] | string,
|
||||
): this {
|
||||
this.add({
|
||||
loc,
|
||||
news: {
|
||||
publication: {
|
||||
name: this.publicationName,
|
||||
language: this.publicationLanguage,
|
||||
},
|
||||
publicationDate,
|
||||
title,
|
||||
keywords,
|
||||
},
|
||||
});
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Import from RSS/Atom feed URL, automatically mapping items to news entries.
|
||||
*/
|
||||
async importFromFeedUrl(feedUrl: string, options?: interfaces.IFeedImportOptions): Promise<this> {
|
||||
const imported = await FeedImporter.fromUrlAsNews(
|
||||
feedUrl,
|
||||
options?.publicationName ?? this.publicationName,
|
||||
options?.publicationLanguage ?? this.publicationLanguage,
|
||||
options,
|
||||
);
|
||||
this.addUrls(imported);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Import from RSS/Atom feed string, automatically mapping items to news entries.
|
||||
*/
|
||||
async importFromFeedString(feedXml: string, options?: interfaces.IFeedImportOptions): Promise<this> {
|
||||
const imported = await FeedImporter.fromStringAsNews(
|
||||
feedXml,
|
||||
options?.publicationName ?? this.publicationName,
|
||||
options?.publicationLanguage ?? this.publicationLanguage,
|
||||
options,
|
||||
);
|
||||
this.addUrls(imported);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Import from @tsclass/tsclass IArticle array with proper news mapping.
|
||||
*/
|
||||
importFromArticles(articles: plugins.tsclass.content.IArticle[]): this {
|
||||
for (const article of articles) {
|
||||
this.add({
|
||||
loc: article.url,
|
||||
lastmod: article.timestamp ? new Date(article.timestamp) : undefined,
|
||||
news: {
|
||||
publication: {
|
||||
name: this.publicationName,
|
||||
language: this.publicationLanguage,
|
||||
},
|
||||
publicationDate: article.timestamp ? new Date(article.timestamp) : new Date(),
|
||||
title: article.title || '',
|
||||
keywords: article.tags,
|
||||
},
|
||||
});
|
||||
}
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -1,79 +0,0 @@
|
||||
import * as plugins from './smartsitemap.plugins.js';
|
||||
import * as interfaces from './interfaces/index.js';
|
||||
|
||||
export class SitemapNews {
|
||||
public rssItems: interfaces.IRssItem[] = [];
|
||||
|
||||
constructor(optionsArg: {}) {}
|
||||
|
||||
public async readAndAddFromRssFeedString(feedStringArg: string) {
|
||||
const smartfeedInstance = new plugins.smartfeed.Smartfeed();
|
||||
const parsedFeed =
|
||||
await smartfeedInstance.parseFeedFromString(feedStringArg);
|
||||
this.rssItems = this.rssItems.concat(parsedFeed.items);
|
||||
}
|
||||
|
||||
public async readAndAddFromRssFeedUrl(urlArg: string) {
|
||||
const smartfeedInstance = new plugins.smartfeed.Smartfeed();
|
||||
const parsedFeed = await smartfeedInstance.parseFeedFromUrl(urlArg);
|
||||
this.rssItems = this.rssItems.concat(parsedFeed.items);
|
||||
}
|
||||
|
||||
public async readAndParseArticles(
|
||||
articleArrayArg: plugins.tsclass.content.IArticle[],
|
||||
) {
|
||||
const rssItemArray = articleArrayArg.map(
|
||||
(articleArg): interfaces.IRssItem => {
|
||||
return {
|
||||
title: articleArg.title,
|
||||
content: articleArg.content,
|
||||
isoDate:
|
||||
new Date(/* TODO: put article timestamp here */).toISOString(),
|
||||
link: articleArg.url,
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
this.rssItems = this.rssItems.concat(rssItemArray);
|
||||
}
|
||||
|
||||
public exportSitemapXml() {
|
||||
const urls: {
|
||||
loc: string;
|
||||
'news:news': {
|
||||
'news:publication': {
|
||||
'news:name': string;
|
||||
'news:language': string;
|
||||
};
|
||||
'news:publication_date': string;
|
||||
'news:keywords': string;
|
||||
'news:title': string;
|
||||
};
|
||||
}[] = [];
|
||||
for (const itemArg of this.rssItems) {
|
||||
console.log(itemArg);
|
||||
urls.push({
|
||||
loc: itemArg.link,
|
||||
'news:news': {
|
||||
'news:publication': {
|
||||
'news:language': 'en',
|
||||
'news:name': 'some name',
|
||||
},
|
||||
'news:keywords': '',
|
||||
'news:publication_date': itemArg.isoDate,
|
||||
'news:title': itemArg.title,
|
||||
},
|
||||
});
|
||||
}
|
||||
const sitemapObject: any = {
|
||||
urlset: {
|
||||
'@_xmlns': 'http://www.sitemaps.org/schemas/sitemap/0.9',
|
||||
'@_xmlns:news': 'http://www.google.com/schemas/sitemap-news/0.9',
|
||||
url: urls,
|
||||
},
|
||||
};
|
||||
const smartxmlInstance = new plugins.smartxml.SmartXml();
|
||||
const sitemapString = smartxmlInstance.createXmlFromObject(sitemapObject);
|
||||
return sitemapString;
|
||||
}
|
||||
}
|
||||
251
ts/smartsitemap.classes.sitemapparser.ts
Normal file
251
ts/smartsitemap.classes.sitemapparser.ts
Normal file
@@ -0,0 +1,251 @@
|
||||
import * as plugins from './smartsitemap.plugins.js';
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
import { UrlsetBuilder } from './smartsitemap.classes.urlsetbuilder.js';
|
||||
|
||||
/**
|
||||
* Parses existing sitemap XML into structured data.
|
||||
* Handles both <urlset> sitemaps and <sitemapindex> files.
|
||||
*/
|
||||
export class SitemapParser {
|
||||
/**
|
||||
* Parse a sitemap XML string into structured data.
|
||||
*/
|
||||
static async parse(xml: string): Promise<interfaces.IParsedSitemap> {
|
||||
const smartXml = new plugins.smartxml.SmartXml();
|
||||
const parsed = smartXml.parseXmlToObject(xml);
|
||||
|
||||
// The parser returns ordered format (preserveOrder: true)
|
||||
// We need to walk the structure to extract urls or sitemap entries
|
||||
return SitemapParser.processOrderedParsed(parsed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch and parse a sitemap from a URL.
|
||||
*/
|
||||
static async parseUrl(url: string): Promise<interfaces.IParsedSitemap> {
|
||||
const response = await plugins.webrequest.webrequest(url);
|
||||
const xml = await response.text();
|
||||
return SitemapParser.parse(xml);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a sitemap XML and return a pre-populated UrlsetBuilder.
|
||||
*/
|
||||
static async toBuilder(xml: string, options?: interfaces.ISitemapOptions): Promise<UrlsetBuilder> {
|
||||
const parsed = await SitemapParser.parse(xml);
|
||||
const builder = new UrlsetBuilder(options);
|
||||
builder.addUrls(parsed.urls);
|
||||
return builder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect whether XML is a urlset or sitemapindex without full parsing.
|
||||
*/
|
||||
static detectType(xml: string): 'urlset' | 'sitemapindex' | 'unknown' {
|
||||
if (xml.includes('<urlset')) return 'urlset';
|
||||
if (xml.includes('<sitemapindex')) return 'sitemapindex';
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the ordered-format output from smartxml's parseXmlToObject.
|
||||
* The ordered format uses arrays of objects where each object has a single key.
|
||||
*/
|
||||
private static processOrderedParsed(parsed: any[]): interfaces.IParsedSitemap {
|
||||
const result: interfaces.IParsedSitemap = {
|
||||
type: 'urlset',
|
||||
urls: [],
|
||||
sitemaps: [],
|
||||
};
|
||||
|
||||
if (!Array.isArray(parsed)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
for (const node of parsed) {
|
||||
if (node.urlset) {
|
||||
result.type = 'urlset';
|
||||
result.urls = SitemapParser.extractUrls(node.urlset);
|
||||
} else if (node.sitemapindex) {
|
||||
result.type = 'sitemapindex';
|
||||
result.sitemaps = SitemapParser.extractIndexEntries(node.sitemapindex);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract URL entries from an ordered-format urlset.
|
||||
*/
|
||||
private static extractUrls(urlsetNodes: any[]): interfaces.ISitemapUrl[] {
|
||||
const urls: interfaces.ISitemapUrl[] = [];
|
||||
|
||||
if (!Array.isArray(urlsetNodes)) return urls;
|
||||
|
||||
for (const node of urlsetNodes) {
|
||||
if (node.url) {
|
||||
const urlData = SitemapParser.extractUrlData(node.url);
|
||||
if (urlData) urls.push(urlData);
|
||||
}
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a single URL entry from ordered-format nodes.
|
||||
*/
|
||||
private static extractUrlData(urlNodes: any[]): interfaces.ISitemapUrl | null {
|
||||
if (!Array.isArray(urlNodes)) return null;
|
||||
|
||||
const url: interfaces.ISitemapUrl = { loc: '' };
|
||||
|
||||
for (const node of urlNodes) {
|
||||
if (node.loc) {
|
||||
url.loc = SitemapParser.extractText(node.loc);
|
||||
} else if (node.lastmod) {
|
||||
url.lastmod = SitemapParser.extractText(node.lastmod);
|
||||
} else if (node.changefreq) {
|
||||
url.changefreq = SitemapParser.extractText(node.changefreq) as interfaces.TChangeFreq;
|
||||
} else if (node.priority) {
|
||||
const pText = SitemapParser.extractText(node.priority);
|
||||
url.priority = parseFloat(pText);
|
||||
} else if (node['image:image']) {
|
||||
if (!url.images) url.images = [];
|
||||
url.images.push(SitemapParser.extractImageData(node['image:image']));
|
||||
} else if (node['video:video']) {
|
||||
if (!url.videos) url.videos = [];
|
||||
url.videos.push(SitemapParser.extractVideoData(node['video:video']));
|
||||
} else if (node['news:news']) {
|
||||
url.news = SitemapParser.extractNewsData(node['news:news']);
|
||||
} else if (node['xhtml:link']) {
|
||||
if (!url.alternates) url.alternates = [];
|
||||
const attrs = node[':@'] || {};
|
||||
if (attrs['@_hreflang'] && attrs['@_href']) {
|
||||
url.alternates.push({
|
||||
hreflang: attrs['@_hreflang'],
|
||||
href: attrs['@_href'],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return url.loc ? url : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract image data from ordered-format nodes.
|
||||
*/
|
||||
private static extractImageData(nodes: any[]): interfaces.ISitemapImage {
|
||||
const img: interfaces.ISitemapImage = { loc: '' };
|
||||
if (!Array.isArray(nodes)) return img;
|
||||
|
||||
for (const node of nodes) {
|
||||
if (node['image:loc']) img.loc = SitemapParser.extractText(node['image:loc']);
|
||||
else if (node['image:caption']) img.caption = SitemapParser.extractText(node['image:caption']);
|
||||
else if (node['image:title']) img.title = SitemapParser.extractText(node['image:title']);
|
||||
else if (node['image:geo_location']) img.geoLocation = SitemapParser.extractText(node['image:geo_location']);
|
||||
else if (node['image:license']) img.licenseUrl = SitemapParser.extractText(node['image:license']);
|
||||
}
|
||||
return img;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract video data from ordered-format nodes.
|
||||
*/
|
||||
private static extractVideoData(nodes: any[]): interfaces.ISitemapVideo {
|
||||
const vid: interfaces.ISitemapVideo = { thumbnailLoc: '', title: '', description: '' };
|
||||
if (!Array.isArray(nodes)) return vid;
|
||||
|
||||
for (const node of nodes) {
|
||||
if (node['video:thumbnail_loc']) vid.thumbnailLoc = SitemapParser.extractText(node['video:thumbnail_loc']);
|
||||
else if (node['video:title']) vid.title = SitemapParser.extractText(node['video:title']);
|
||||
else if (node['video:description']) vid.description = SitemapParser.extractText(node['video:description']);
|
||||
else if (node['video:content_loc']) vid.contentLoc = SitemapParser.extractText(node['video:content_loc']);
|
||||
else if (node['video:player_loc']) vid.playerLoc = SitemapParser.extractText(node['video:player_loc']);
|
||||
else if (node['video:duration']) vid.duration = parseInt(SitemapParser.extractText(node['video:duration']));
|
||||
else if (node['video:rating']) vid.rating = parseFloat(SitemapParser.extractText(node['video:rating']));
|
||||
else if (node['video:view_count']) vid.viewCount = parseInt(SitemapParser.extractText(node['video:view_count']));
|
||||
else if (node['video:publication_date']) vid.publicationDate = SitemapParser.extractText(node['video:publication_date']);
|
||||
else if (node['video:family_friendly']) vid.familyFriendly = SitemapParser.extractText(node['video:family_friendly']) === 'yes';
|
||||
else if (node['video:live']) vid.live = SitemapParser.extractText(node['video:live']) === 'yes';
|
||||
else if (node['video:requires_subscription']) vid.requiresSubscription = SitemapParser.extractText(node['video:requires_subscription']) === 'yes';
|
||||
else if (node['video:tag']) {
|
||||
if (!vid.tags) vid.tags = [];
|
||||
vid.tags.push(SitemapParser.extractText(node['video:tag']));
|
||||
}
|
||||
}
|
||||
return vid;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract news data from ordered-format nodes.
|
||||
*/
|
||||
private static extractNewsData(nodes: any[]): interfaces.ISitemapNews {
|
||||
const news: interfaces.ISitemapNews = {
|
||||
publication: { name: '', language: '' },
|
||||
publicationDate: '',
|
||||
title: '',
|
||||
};
|
||||
if (!Array.isArray(nodes)) return news;
|
||||
|
||||
for (const node of nodes) {
|
||||
if (node['news:publication']) {
|
||||
const pubNodes = node['news:publication'];
|
||||
if (Array.isArray(pubNodes)) {
|
||||
for (const pNode of pubNodes) {
|
||||
if (pNode['news:name']) news.publication.name = SitemapParser.extractText(pNode['news:name']);
|
||||
else if (pNode['news:language']) news.publication.language = SitemapParser.extractText(pNode['news:language']);
|
||||
}
|
||||
}
|
||||
} else if (node['news:publication_date']) {
|
||||
news.publicationDate = SitemapParser.extractText(node['news:publication_date']);
|
||||
} else if (node['news:title']) {
|
||||
news.title = SitemapParser.extractText(node['news:title']);
|
||||
} else if (node['news:keywords']) {
|
||||
news.keywords = SitemapParser.extractText(node['news:keywords']);
|
||||
}
|
||||
}
|
||||
return news;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract sitemap index entries from ordered-format nodes.
|
||||
*/
|
||||
private static extractIndexEntries(indexNodes: any[]): interfaces.ISitemapIndexEntry[] {
|
||||
const entries: interfaces.ISitemapIndexEntry[] = [];
|
||||
|
||||
if (!Array.isArray(indexNodes)) return entries;
|
||||
|
||||
for (const node of indexNodes) {
|
||||
if (node.sitemap) {
|
||||
const entry: interfaces.ISitemapIndexEntry = { loc: '' };
|
||||
if (Array.isArray(node.sitemap)) {
|
||||
for (const sNode of node.sitemap) {
|
||||
if (sNode.loc) entry.loc = SitemapParser.extractText(sNode.loc);
|
||||
else if (sNode.lastmod) entry.lastmod = SitemapParser.extractText(sNode.lastmod);
|
||||
}
|
||||
}
|
||||
if (entry.loc) entries.push(entry);
|
||||
}
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text content from an ordered-format node.
|
||||
* In ordered format, text is stored as [{ '#text': 'value' }].
|
||||
*/
|
||||
private static extractText(nodes: any): string {
|
||||
if (typeof nodes === 'string') return nodes;
|
||||
if (typeof nodes === 'number') return String(nodes);
|
||||
if (Array.isArray(nodes)) {
|
||||
for (const n of nodes) {
|
||||
if (n['#text'] != null) return String(n['#text']);
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
}
|
||||
168
ts/smartsitemap.classes.sitemapstream.ts
Normal file
168
ts/smartsitemap.classes.sitemapstream.ts
Normal file
@@ -0,0 +1,168 @@
|
||||
import * as plugins from './smartsitemap.plugins.js';
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
import { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js';
|
||||
|
||||
/**
|
||||
* A Node.js Readable stream that generates sitemap XML incrementally.
|
||||
* Suitable for very large sitemaps (millions of URLs) that cannot be held in memory.
|
||||
*
|
||||
* Usage:
|
||||
* const stream = new SitemapStream();
|
||||
* stream.pipe(createWriteStream('sitemap.xml'));
|
||||
* stream.pushUrl({ loc: 'https://example.com/' });
|
||||
* stream.pushUrl({ loc: 'https://example.com/about' });
|
||||
* stream.finish();
|
||||
*/
|
||||
export class SitemapStream extends plugins.Readable {
|
||||
private options: interfaces.ISitemapOptions;
|
||||
private urlCount = 0;
|
||||
private headerWritten = false;
|
||||
private finished = false;
|
||||
private namespaces: Set<string> = new Set();
|
||||
|
||||
constructor(options?: interfaces.ISitemapOptions) {
|
||||
super({ encoding: 'utf-8' });
|
||||
this.options = {
|
||||
prettyPrint: true,
|
||||
...options,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Push a URL entry into the stream.
|
||||
* The URL is immediately rendered to XML and pushed to the readable buffer.
|
||||
*/
|
||||
pushUrl(url: interfaces.ISitemapUrl): boolean {
|
||||
if (this.finished) {
|
||||
throw new Error('Cannot push URLs after calling finish()');
|
||||
}
|
||||
|
||||
// Detect needed namespaces
|
||||
if (url.images?.length) this.namespaces.add('image');
|
||||
if (url.videos?.length) this.namespaces.add('video');
|
||||
if (url.news) this.namespaces.add('news');
|
||||
if (url.alternates?.length) this.namespaces.add('xhtml');
|
||||
|
||||
// Write header on first URL
|
||||
if (!this.headerWritten) {
|
||||
this.writeHeader();
|
||||
}
|
||||
|
||||
// Build URL element XML using XmlRenderer internals
|
||||
const indent = this.options.prettyPrint !== false ? ' ' : '';
|
||||
const nl = this.options.prettyPrint !== false ? '\n' : '';
|
||||
|
||||
let urlXml = `${indent}<url>${nl}`;
|
||||
urlXml += `${indent}${indent}<loc>${XmlRenderer.escapeXml(url.loc)}</loc>${nl}`;
|
||||
|
||||
if (url.lastmod != null) {
|
||||
urlXml += `${indent}${indent}<lastmod>${XmlRenderer.formatDate(url.lastmod)}</lastmod>${nl}`;
|
||||
}
|
||||
|
||||
const changefreq = url.changefreq ?? this.options.defaultChangeFreq;
|
||||
if (changefreq) {
|
||||
urlXml += `${indent}${indent}<changefreq>${changefreq}</changefreq>${nl}`;
|
||||
}
|
||||
|
||||
const priority = url.priority ?? this.options.defaultPriority;
|
||||
if (priority != null) {
|
||||
urlXml += `${indent}${indent}<priority>${priority.toFixed(1)}</priority>${nl}`;
|
||||
}
|
||||
|
||||
// Extensions (simplified inline rendering for streaming)
|
||||
if (url.images) {
|
||||
for (const img of url.images) {
|
||||
urlXml += `${indent}${indent}<image:image>${nl}`;
|
||||
urlXml += `${indent}${indent}${indent}<image:loc>${XmlRenderer.escapeXml(img.loc)}</image:loc>${nl}`;
|
||||
if (img.caption) urlXml += `${indent}${indent}${indent}<image:caption>${XmlRenderer.escapeXml(img.caption)}</image:caption>${nl}`;
|
||||
if (img.title) urlXml += `${indent}${indent}${indent}<image:title>${XmlRenderer.escapeXml(img.title)}</image:title>${nl}`;
|
||||
urlXml += `${indent}${indent}</image:image>${nl}`;
|
||||
}
|
||||
}
|
||||
|
||||
if (url.news) {
|
||||
urlXml += `${indent}${indent}<news:news>${nl}`;
|
||||
urlXml += `${indent}${indent}${indent}<news:publication>${nl}`;
|
||||
urlXml += `${indent}${indent}${indent}${indent}<news:name>${XmlRenderer.escapeXml(url.news.publication.name)}</news:name>${nl}`;
|
||||
urlXml += `${indent}${indent}${indent}${indent}<news:language>${url.news.publication.language}</news:language>${nl}`;
|
||||
urlXml += `${indent}${indent}${indent}</news:publication>${nl}`;
|
||||
urlXml += `${indent}${indent}${indent}<news:publication_date>${XmlRenderer.formatDate(url.news.publicationDate)}</news:publication_date>${nl}`;
|
||||
urlXml += `${indent}${indent}${indent}<news:title>${XmlRenderer.escapeXml(url.news.title)}</news:title>${nl}`;
|
||||
if (url.news.keywords) {
|
||||
const kw = Array.isArray(url.news.keywords) ? url.news.keywords.join(', ') : url.news.keywords;
|
||||
urlXml += `${indent}${indent}${indent}<news:keywords>${XmlRenderer.escapeXml(kw)}</news:keywords>${nl}`;
|
||||
}
|
||||
urlXml += `${indent}${indent}</news:news>${nl}`;
|
||||
}
|
||||
|
||||
if (url.alternates) {
|
||||
for (const alt of url.alternates) {
|
||||
urlXml += `${indent}${indent}<xhtml:link rel="alternate" hreflang="${alt.hreflang}" href="${XmlRenderer.escapeXml(alt.href)}"/>${nl}`;
|
||||
}
|
||||
}
|
||||
|
||||
urlXml += `${indent}</url>${nl}`;
|
||||
|
||||
this.urlCount++;
|
||||
return this.push(urlXml);
|
||||
}
|
||||
|
||||
/**
|
||||
* Signal that no more URLs will be added.
|
||||
* Writes the closing tag and ends the stream.
|
||||
*/
|
||||
finish(): void {
|
||||
if (this.finished) return;
|
||||
this.finished = true;
|
||||
|
||||
if (!this.headerWritten) {
|
||||
// Empty sitemap
|
||||
this.writeHeader();
|
||||
}
|
||||
|
||||
this.push('</urlset>\n');
|
||||
this.push(null); // signal end of stream
|
||||
}
|
||||
|
||||
/** Get the number of URLs written so far */
|
||||
get count(): number {
|
||||
return this.urlCount;
|
||||
}
|
||||
|
||||
// Required by Readable
|
||||
_read(): void {
|
||||
// Data is pushed via pushUrl(), not pulled
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the XML header and opening urlset tag.
|
||||
* Namespace declarations are based on what's been detected so far.
|
||||
*/
|
||||
private writeHeader(): void {
|
||||
this.headerWritten = true;
|
||||
const nl = this.options.prettyPrint !== false ? '\n' : '';
|
||||
|
||||
let header = `<?xml version="1.0" encoding="UTF-8"?>${nl}`;
|
||||
|
||||
if (this.options.xslUrl) {
|
||||
header += `<?xml-stylesheet type="text/xsl" href="${XmlRenderer.escapeXml(this.options.xslUrl)}"?>${nl}`;
|
||||
}
|
||||
|
||||
header += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"';
|
||||
if (this.namespaces.has('image')) {
|
||||
header += `${nl} xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"`;
|
||||
}
|
||||
if (this.namespaces.has('video')) {
|
||||
header += `${nl} xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"`;
|
||||
}
|
||||
if (this.namespaces.has('news')) {
|
||||
header += `${nl} xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"`;
|
||||
}
|
||||
if (this.namespaces.has('xhtml')) {
|
||||
header += `${nl} xmlns:xhtml="http://www.w3.org/1999/xhtml"`;
|
||||
}
|
||||
header += `>${nl}`;
|
||||
|
||||
this.push(header);
|
||||
}
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
import * as plugins from './smartsitemap.plugins.js';
|
||||
|
||||
export type TUpdateFrequency =
|
||||
| 'never'
|
||||
| 'daily'
|
||||
| 'weekly'
|
||||
| 'monthly'
|
||||
| 'yearly';
|
||||
|
||||
export interface IUrlInfo {
|
||||
url: string;
|
||||
timestamp: number;
|
||||
frequency?: TUpdateFrequency;
|
||||
}
|
||||
|
||||
export class SitemapWebsite {
|
||||
urlInfos: IUrlInfo[] = [];
|
||||
constructor() {}
|
||||
|
||||
public addUrl(urlInfoArg: IUrlInfo) {
|
||||
this.urlInfos.push(urlInfoArg);
|
||||
}
|
||||
|
||||
public exportSitemapXml() {
|
||||
const urls: {
|
||||
loc: string;
|
||||
lastmod: string;
|
||||
changefreq: TUpdateFrequency;
|
||||
}[] = [];
|
||||
for (const urlInfoArg of this.urlInfos) {
|
||||
urls.push({
|
||||
loc: urlInfoArg.url,
|
||||
lastmod: new Date(urlInfoArg.timestamp).toISOString(),
|
||||
changefreq: urlInfoArg.frequency ? urlInfoArg.frequency : 'weekly',
|
||||
});
|
||||
}
|
||||
const sitemapObject: any = {
|
||||
urlset: {
|
||||
'@_xmlns': 'http://www.sitemaps.org/schemas/sitemap/0.9',
|
||||
url: urls,
|
||||
},
|
||||
};
|
||||
const smartxmlInstance = new plugins.smartxml.SmartXml();
|
||||
const sitemapString = smartxmlInstance.createXmlFromObject(sitemapObject);
|
||||
return sitemapString;
|
||||
}
|
||||
}
|
||||
@@ -1,92 +1,112 @@
|
||||
import { SitemapNews } from './smartsitemap.classes.sitemapnews.js';
|
||||
import {
|
||||
type IUrlInfo,
|
||||
SitemapWebsite,
|
||||
} from './smartsitemap.classes.sitemapwebsite.js';
|
||||
import * as plugins from './smartsitemap.plugins.js';
|
||||
import * as interfaces from './interfaces/index.js';
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
import { UrlsetBuilder } from './smartsitemap.classes.urlsetbuilder.js';
|
||||
import { NewsSitemapBuilder } from './smartsitemap.classes.newsbuilder.js';
|
||||
import { SitemapIndexBuilder } from './smartsitemap.classes.indexbuilder.js';
|
||||
import { SitemapParser } from './smartsitemap.classes.sitemapparser.js';
|
||||
import { FeedImporter } from './smartsitemap.classes.feedimporter.js';
|
||||
import { YamlImporter } from './smartsitemap.classes.yamlimporter.js';
|
||||
import { SitemapValidator } from './smartsitemap.classes.validator.js';
|
||||
|
||||
/**
|
||||
* Main entry point for @push.rocks/smartsitemap.
|
||||
* Provides static factory methods for creating, parsing, and validating sitemaps.
|
||||
*
|
||||
* @example Simple sitemap
|
||||
* ```typescript
|
||||
* const xml = SmartSitemap.create()
|
||||
* .addUrl('https://example.com/')
|
||||
* .addUrl('https://example.com/about')
|
||||
* .toXml();
|
||||
* ```
|
||||
*
|
||||
* @example News sitemap from RSS feed
|
||||
* ```typescript
|
||||
* const builder = SmartSitemap.createNews({ publicationName: 'My Pub' });
|
||||
* await builder.importFromFeedUrl('https://example.com/rss/');
|
||||
* const xml = builder.toXml();
|
||||
* ```
|
||||
*/
|
||||
export class SmartSitemap {
|
||||
constructor() {}
|
||||
// ──────────────────────────────────────────────
|
||||
// Static Factory Methods
|
||||
// ──────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* creates a sitemap for news from feedurl
|
||||
*/
|
||||
public async createSitemapNewsFromFeedUrl(
|
||||
feedUrlArg: string,
|
||||
): Promise<string> {
|
||||
const sitemapNewsInstance = new SitemapNews({});
|
||||
await sitemapNewsInstance.readAndAddFromRssFeedUrl(feedUrlArg);
|
||||
return sitemapNewsInstance.exportSitemapXml();
|
||||
/** Create a standard sitemap builder */
|
||||
static create(options?: interfaces.ISitemapOptions): UrlsetBuilder {
|
||||
return new UrlsetBuilder(options);
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a sitemap for news from feedxmlstring
|
||||
*/
|
||||
public async createSitemapNewsFromAFeedStringArg(
|
||||
feedStringArg: string,
|
||||
): Promise<string> {
|
||||
const sitemapNewsInstance = new SitemapNews({});
|
||||
await sitemapNewsInstance.readAndAddFromRssFeedString(feedStringArg);
|
||||
return sitemapNewsInstance.exportSitemapXml();
|
||||
/** Create a news sitemap builder */
|
||||
static createNews(options: interfaces.INewsSitemapOptions): NewsSitemapBuilder {
|
||||
return new NewsSitemapBuilder(options);
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a sitemap for news from an array of articles
|
||||
*/
|
||||
public async createSitemapNewsFromArticleArray(
|
||||
articleArrayArg: plugins.tsclass.content.IArticle[],
|
||||
): Promise<string> {
|
||||
const sitemapNewsInstance = new SitemapNews({});
|
||||
await sitemapNewsInstance.readAndParseArticles(articleArrayArg);
|
||||
return sitemapNewsInstance.exportSitemapXml();
|
||||
/** Create a sitemap index builder */
|
||||
static createIndex(options?: interfaces.ISitemapOptions): SitemapIndexBuilder {
|
||||
return new SitemapIndexBuilder(options);
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a normal sitemap from a list of urls
|
||||
*/
|
||||
public async createSitemapFromYmlString(yamlString: string): Promise<string> {
|
||||
const yamlObject: interfaces.ISitemapYaml =
|
||||
await plugins.smartyaml.yamlStringToObject(yamlString);
|
||||
const sitemapWebsite = new SitemapWebsite();
|
||||
for (const urlArg of yamlObject.daily) {
|
||||
sitemapWebsite.addUrl({
|
||||
url: urlArg,
|
||||
timestamp: Date.now() - 10000,
|
||||
frequency: 'daily',
|
||||
});
|
||||
}
|
||||
return sitemapWebsite.exportSitemapXml();
|
||||
/** Parse a sitemap XML string into structured data */
|
||||
static async parse(xml: string): Promise<interfaces.IParsedSitemap> {
|
||||
return SitemapParser.parse(xml);
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a normal sitemap from a list of urls
|
||||
*/
|
||||
public async createSitemapFromUrlInfoArray(urlInfosArg: IUrlInfo[]) {
|
||||
const sitemapWebsite = new SitemapWebsite();
|
||||
for (const urlInfo of urlInfosArg) {
|
||||
sitemapWebsite.addUrl(urlInfo);
|
||||
}
|
||||
return sitemapWebsite.exportSitemapXml();
|
||||
/** Fetch and parse a sitemap from a URL */
|
||||
static async parseUrl(url: string): Promise<interfaces.IParsedSitemap> {
|
||||
return SitemapParser.parseUrl(url);
|
||||
}
|
||||
|
||||
/**
|
||||
* parses a sitemap url
|
||||
*/
|
||||
public async parseSitemapUrl(urlArg: string) {
|
||||
const response = await plugins.webrequest.webrequest(urlArg);
|
||||
const sitemapXml = await response.text();
|
||||
|
||||
const parsedSitemap = await this.parseSitemap(sitemapXml);
|
||||
return parsedSitemap;
|
||||
/** Create a UrlsetBuilder populated from an RSS/Atom feed URL */
|
||||
static async fromFeedUrl(
|
||||
feedUrl: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): Promise<UrlsetBuilder> {
|
||||
const urls = await FeedImporter.fromUrl(feedUrl, options);
|
||||
const builder = new UrlsetBuilder();
|
||||
builder.addUrls(urls);
|
||||
return builder;
|
||||
}
|
||||
|
||||
/**
|
||||
* parses a sitemap
|
||||
*/
|
||||
public async parseSitemap(
|
||||
sitemapXmlArg: string,
|
||||
): Promise<interfaces.IParsedSiteMap> {
|
||||
return new plugins.smartxml.SmartXml().parseXmlToObject(sitemapXmlArg);
|
||||
/** Create a UrlsetBuilder populated from an RSS/Atom feed string */
|
||||
static async fromFeedString(
|
||||
feedXml: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): Promise<UrlsetBuilder> {
|
||||
const urls = await FeedImporter.fromString(feedXml, options);
|
||||
const builder = new UrlsetBuilder();
|
||||
builder.addUrls(urls);
|
||||
return builder;
|
||||
}
|
||||
|
||||
/** Create a UrlsetBuilder populated from a YAML config string */
|
||||
static async fromYaml(yamlString: string): Promise<UrlsetBuilder> {
|
||||
const urls = await YamlImporter.parseConfig(yamlString);
|
||||
const builder = new UrlsetBuilder();
|
||||
builder.addUrls(urls);
|
||||
return builder;
|
||||
}
|
||||
|
||||
/** Create a NewsSitemapBuilder populated from @tsclass/tsclass IArticle array */
|
||||
static fromArticles(
|
||||
articles: plugins.tsclass.content.IArticle[],
|
||||
options: interfaces.INewsSitemapOptions,
|
||||
): NewsSitemapBuilder {
|
||||
const builder = new NewsSitemapBuilder(options);
|
||||
builder.importFromArticles(articles);
|
||||
return builder;
|
||||
}
|
||||
|
||||
/** Create a UrlsetBuilder from a simple URL string array */
|
||||
static fromUrls(urls: string[], options?: interfaces.ISitemapOptions): UrlsetBuilder {
|
||||
const builder = new UrlsetBuilder(options);
|
||||
builder.addFromArray(urls);
|
||||
return builder;
|
||||
}
|
||||
|
||||
/** Validate a sitemap XML string */
|
||||
static async validate(xml: string): Promise<interfaces.IValidationResult> {
|
||||
const parsed = await SitemapParser.parse(xml);
|
||||
return SitemapValidator.validateUrlset(parsed.urls);
|
||||
}
|
||||
}
|
||||
|
||||
274
ts/smartsitemap.classes.urlsetbuilder.ts
Normal file
274
ts/smartsitemap.classes.urlsetbuilder.ts
Normal file
@@ -0,0 +1,274 @@
|
||||
import * as plugins from './smartsitemap.plugins.js';
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
import { XmlRenderer } from './smartsitemap.classes.xmlrenderer.js';
|
||||
import { SitemapValidator } from './smartsitemap.classes.validator.js';
|
||||
import { FeedImporter } from './smartsitemap.classes.feedimporter.js';
|
||||
import { YamlImporter } from './smartsitemap.classes.yamlimporter.js';
|
||||
import type { SitemapStream } from './smartsitemap.classes.sitemapstream.js';
|
||||
|
||||
/**
|
||||
* Chainable builder for creating standard XML sitemaps (<urlset>).
|
||||
* Every mutating method returns `this` for fluent chaining.
|
||||
*
|
||||
* Supports all sitemap extensions (images, videos, news, hreflang),
|
||||
* auto-splitting at 50K URLs, multiple output formats, and validation.
|
||||
*/
|
||||
export class UrlsetBuilder {
|
||||
protected urls: interfaces.ISitemapUrl[] = [];
|
||||
protected options: interfaces.ISitemapOptions;
|
||||
|
||||
constructor(options?: interfaces.ISitemapOptions) {
|
||||
this.options = {
|
||||
prettyPrint: true,
|
||||
maxUrlsPerSitemap: 50000,
|
||||
validate: true,
|
||||
...options,
|
||||
};
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────
|
||||
// Adding URLs
|
||||
// ──────────────────────────────────────────────
|
||||
|
||||
/** Add a single URL with full options */
|
||||
add(url: interfaces.ISitemapUrl): this {
|
||||
this.urls.push(url);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Add a URL by loc string, optionally with lastmod */
|
||||
addUrl(loc: string, lastmod?: Date | string | number): this {
|
||||
const url: interfaces.ISitemapUrl = { loc };
|
||||
if (lastmod != null) {
|
||||
url.lastmod = lastmod;
|
||||
}
|
||||
this.urls.push(url);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Add multiple URL objects */
|
||||
addUrls(urls: interfaces.ISitemapUrl[]): this {
|
||||
this.urls.push(...urls);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Add URLs from a plain string array */
|
||||
addFromArray(locs: string[]): this {
|
||||
for (const loc of locs) {
|
||||
this.urls.push({ loc });
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────
|
||||
// Bulk operations
|
||||
// ──────────────────────────────────────────────
|
||||
|
||||
/** Merge all URLs from another UrlsetBuilder */
|
||||
merge(other: UrlsetBuilder): this {
|
||||
this.urls.push(...other.getUrls());
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Filter URLs by predicate (in-place) */
|
||||
filter(predicate: (url: interfaces.ISitemapUrl) => boolean): this {
|
||||
this.urls = this.urls.filter(predicate);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Transform URLs (in-place) */
|
||||
map(transform: (url: interfaces.ISitemapUrl) => interfaces.ISitemapUrl): this {
|
||||
this.urls = this.urls.map(transform);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Sort URLs (in-place) */
|
||||
sort(compareFn?: (a: interfaces.ISitemapUrl, b: interfaces.ISitemapUrl) => number): this {
|
||||
this.urls.sort(compareFn ?? ((a, b) => a.loc.localeCompare(b.loc)));
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Remove duplicate URLs by loc */
|
||||
dedupe(): this {
|
||||
const seen = new Set<string>();
|
||||
this.urls = this.urls.filter((url) => {
|
||||
if (seen.has(url.loc)) return false;
|
||||
seen.add(url.loc);
|
||||
return true;
|
||||
});
|
||||
return this;
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────
|
||||
// Defaults
|
||||
// ──────────────────────────────────────────────
|
||||
|
||||
/** Set default changefreq for URLs that don't specify one */
|
||||
setDefaultChangeFreq(freq: interfaces.TChangeFreq): this {
|
||||
this.options.defaultChangeFreq = freq;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set default priority for URLs that don't specify one */
|
||||
setDefaultPriority(priority: number): this {
|
||||
this.options.defaultPriority = priority;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set XSL stylesheet URL for browser rendering */
|
||||
setXslUrl(url: string): this {
|
||||
this.options.xslUrl = url;
|
||||
return this;
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────
|
||||
// Import sources (async, return Promise<this>)
|
||||
// ──────────────────────────────────────────────
|
||||
|
||||
/** Import URLs from an RSS/Atom feed URL */
|
||||
async importFromFeedUrl(feedUrl: string, options?: interfaces.IFeedImportOptions): Promise<this> {
|
||||
const imported = await FeedImporter.fromUrl(feedUrl, options);
|
||||
this.urls.push(...imported);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Import URLs from an RSS/Atom feed XML string */
|
||||
async importFromFeedString(feedXml: string, options?: interfaces.IFeedImportOptions): Promise<this> {
|
||||
const imported = await FeedImporter.fromString(feedXml, options);
|
||||
this.urls.push(...imported);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Import URLs from a YAML config string */
|
||||
async importFromYaml(yamlString: string): Promise<this> {
|
||||
const imported = await YamlImporter.parseConfig(yamlString);
|
||||
this.urls.push(...imported);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Import from @tsclass/tsclass IArticle array */
|
||||
importFromArticles(articles: plugins.tsclass.content.IArticle[]): this {
|
||||
for (const article of articles) {
|
||||
const url: interfaces.ISitemapUrl = {
|
||||
loc: article.url,
|
||||
lastmod: article.timestamp ? new Date(article.timestamp) : undefined,
|
||||
};
|
||||
this.urls.push(url);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────
|
||||
// Output
|
||||
// ──────────────────────────────────────────────
|
||||
|
||||
/** Export as sitemap XML string */
|
||||
toXml(): string {
|
||||
return XmlRenderer.renderUrlset(this.urls, this.options);
|
||||
}
|
||||
|
||||
/** Export as plain text (one URL per line) */
|
||||
toTxt(): string {
|
||||
return XmlRenderer.renderTxt(this.urls);
|
||||
}
|
||||
|
||||
/** Export as JSON string */
|
||||
toJson(): string {
|
||||
return XmlRenderer.renderJson(this.urls);
|
||||
}
|
||||
|
||||
/** Export as gzipped XML buffer */
|
||||
async toGzipBuffer(): Promise<Buffer> {
|
||||
const xml = this.toXml();
|
||||
const gzip = plugins.promisify(plugins.zlib.gzip);
|
||||
return gzip(Buffer.from(xml, 'utf-8')) as Promise<Buffer>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Export with automatic index splitting.
|
||||
* If URL count exceeds maxUrlsPerSitemap, returns a sitemap index
|
||||
* plus individual sitemap chunks.
|
||||
*/
|
||||
toSitemapSet(): interfaces.ISitemapSet {
|
||||
const maxUrls = Math.min(this.options.maxUrlsPerSitemap ?? 50000, 50000);
|
||||
|
||||
if (this.urls.length <= maxUrls) {
|
||||
return {
|
||||
needsIndex: false,
|
||||
indexXml: null,
|
||||
sitemaps: [{ filename: 'sitemap.xml', xml: this.toXml() }],
|
||||
};
|
||||
}
|
||||
|
||||
// Split into chunks
|
||||
const chunks: interfaces.ISitemapUrl[][] = [];
|
||||
for (let i = 0; i < this.urls.length; i += maxUrls) {
|
||||
chunks.push(this.urls.slice(i, i + maxUrls));
|
||||
}
|
||||
|
||||
const baseUrl = this.options.baseUrl || '';
|
||||
const sitemaps: Array<{ filename: string; xml: string }> = [];
|
||||
const indexEntries: Array<{ loc: string; lastmod?: string }> = [];
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const filename = `sitemap-${i + 1}.xml`;
|
||||
const xml = XmlRenderer.renderUrlset(chunks[i], this.options);
|
||||
sitemaps.push({ filename, xml });
|
||||
indexEntries.push({
|
||||
loc: baseUrl ? `${baseUrl.replace(/\/$/, '')}/${filename}` : filename,
|
||||
});
|
||||
}
|
||||
|
||||
const indexXml = XmlRenderer.renderIndex(indexEntries, this.options);
|
||||
|
||||
return {
|
||||
needsIndex: true,
|
||||
indexXml,
|
||||
sitemaps,
|
||||
};
|
||||
}
|
||||
|
||||
/** Create a Node.js Readable stream for large sitemaps */
|
||||
toStream(): SitemapStream {
|
||||
// Lazy import to avoid circular dependency issues at module level
|
||||
const { SitemapStream: SitemapStreamClass } = require('./smartsitemap.classes.sitemapstream.js');
|
||||
const stream = new SitemapStreamClass(this.options);
|
||||
// Push all URLs into the stream asynchronously
|
||||
process.nextTick(() => {
|
||||
for (const url of this.urls) {
|
||||
stream.pushUrl(url);
|
||||
}
|
||||
stream.finish();
|
||||
});
|
||||
return stream;
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────
|
||||
// Inspection
|
||||
// ──────────────────────────────────────────────
|
||||
|
||||
/** Get the raw URL array */
|
||||
getUrls(): interfaces.ISitemapUrl[] {
|
||||
return [...this.urls];
|
||||
}
|
||||
|
||||
/** Get the number of URLs */
|
||||
get count(): number {
|
||||
return this.urls.length;
|
||||
}
|
||||
|
||||
/** Validate this sitemap against the protocol specification */
|
||||
validate(): interfaces.IValidationResult {
|
||||
return SitemapValidator.validateUrlset(this.urls, this.options);
|
||||
}
|
||||
|
||||
/** Get statistics about this sitemap */
|
||||
stats(): interfaces.ISitemapStats {
|
||||
return SitemapValidator.computeStats(this.urls, this.options);
|
||||
}
|
||||
|
||||
/** Get the options for this builder */
|
||||
getOptions(): interfaces.ISitemapOptions {
|
||||
return { ...this.options };
|
||||
}
|
||||
}
|
||||
289
ts/smartsitemap.classes.validator.ts
Normal file
289
ts/smartsitemap.classes.validator.ts
Normal file
@@ -0,0 +1,289 @@
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
|
||||
const VALID_CHANGEFREQS: interfaces.TChangeFreq[] = [
|
||||
'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never',
|
||||
];
|
||||
|
||||
const MAX_URL_LENGTH = 2048;
|
||||
const MAX_URLS_PER_SITEMAP = 50000;
|
||||
const MAX_SITEMAP_SIZE_BYTES = 52_428_800; // 50 MB
|
||||
const MAX_IMAGES_PER_URL = 1000;
|
||||
const MAX_VIDEO_TAGS = 32;
|
||||
const MAX_VIDEO_DURATION = 28800;
|
||||
const MAX_VIDEO_DESCRIPTION_LENGTH = 2048;
|
||||
|
||||
/**
|
||||
* Validates sitemap URLs and fields against the sitemap protocol specification.
|
||||
*/
|
||||
export class SitemapValidator {
|
||||
/**
|
||||
* Validate a single URL entry.
|
||||
*/
|
||||
static validateUrl(url: interfaces.ISitemapUrl): interfaces.IValidationError[] {
|
||||
const errors: interfaces.IValidationError[] = [];
|
||||
|
||||
// loc is required
|
||||
if (!url.loc) {
|
||||
errors.push({ field: 'loc', message: 'URL loc is required', url: url.loc });
|
||||
} else {
|
||||
errors.push(...SitemapValidator.validateUrlString(url.loc));
|
||||
}
|
||||
|
||||
// priority range
|
||||
if (url.priority != null && (url.priority < 0 || url.priority > 1)) {
|
||||
errors.push({
|
||||
field: 'priority',
|
||||
message: 'Priority must be between 0.0 and 1.0',
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
|
||||
// changefreq
|
||||
if (url.changefreq && !VALID_CHANGEFREQS.includes(url.changefreq)) {
|
||||
errors.push({
|
||||
field: 'changefreq',
|
||||
message: `Invalid changefreq "${url.changefreq}". Must be one of: ${VALID_CHANGEFREQS.join(', ')}`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
|
||||
// lastmod date validation
|
||||
if (url.lastmod != null) {
|
||||
const date = url.lastmod instanceof Date ? url.lastmod : new Date(url.lastmod as any);
|
||||
if (isNaN(date.getTime())) {
|
||||
errors.push({
|
||||
field: 'lastmod',
|
||||
message: `Invalid lastmod date: "${url.lastmod}"`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Images
|
||||
if (url.images) {
|
||||
if (url.images.length > MAX_IMAGES_PER_URL) {
|
||||
errors.push({
|
||||
field: 'images',
|
||||
message: `Maximum ${MAX_IMAGES_PER_URL} images per URL, got ${url.images.length}`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
for (const img of url.images) {
|
||||
if (!img.loc) {
|
||||
errors.push({ field: 'image:loc', message: 'Image loc is required', url: url.loc });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Videos
|
||||
if (url.videos) {
|
||||
for (const vid of url.videos) {
|
||||
if (!vid.thumbnailLoc) {
|
||||
errors.push({ field: 'video:thumbnail_loc', message: 'Video thumbnail_loc is required', url: url.loc });
|
||||
}
|
||||
if (!vid.title) {
|
||||
errors.push({ field: 'video:title', message: 'Video title is required', url: url.loc });
|
||||
}
|
||||
if (!vid.description) {
|
||||
errors.push({ field: 'video:description', message: 'Video description is required', url: url.loc });
|
||||
}
|
||||
if (vid.description && vid.description.length > MAX_VIDEO_DESCRIPTION_LENGTH) {
|
||||
errors.push({
|
||||
field: 'video:description',
|
||||
message: `Video description exceeds ${MAX_VIDEO_DESCRIPTION_LENGTH} chars`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
if (!vid.contentLoc && !vid.playerLoc) {
|
||||
errors.push({
|
||||
field: 'video:content_loc',
|
||||
message: 'Video must have at least one of contentLoc or playerLoc',
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
if (vid.duration != null && (vid.duration < 1 || vid.duration > MAX_VIDEO_DURATION)) {
|
||||
errors.push({
|
||||
field: 'video:duration',
|
||||
message: `Video duration must be 1–${MAX_VIDEO_DURATION} seconds`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
if (vid.rating != null && (vid.rating < 0 || vid.rating > 5)) {
|
||||
errors.push({
|
||||
field: 'video:rating',
|
||||
message: 'Video rating must be 0.0–5.0',
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
if (vid.tags && vid.tags.length > MAX_VIDEO_TAGS) {
|
||||
errors.push({
|
||||
field: 'video:tag',
|
||||
message: `Maximum ${MAX_VIDEO_TAGS} video tags, got ${vid.tags.length}`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// News
|
||||
if (url.news) {
|
||||
if (!url.news.publication?.name) {
|
||||
errors.push({ field: 'news:publication:name', message: 'News publication name is required', url: url.loc });
|
||||
}
|
||||
if (!url.news.publication?.language) {
|
||||
errors.push({ field: 'news:publication:language', message: 'News publication language is required', url: url.loc });
|
||||
}
|
||||
if (!url.news.title) {
|
||||
errors.push({ field: 'news:title', message: 'News title is required', url: url.loc });
|
||||
}
|
||||
if (url.news.publicationDate == null) {
|
||||
errors.push({ field: 'news:publication_date', message: 'News publication date is required', url: url.loc });
|
||||
}
|
||||
}
|
||||
|
||||
// Alternates
|
||||
if (url.alternates) {
|
||||
for (const alt of url.alternates) {
|
||||
if (!alt.hreflang) {
|
||||
errors.push({ field: 'xhtml:link:hreflang', message: 'Alternate hreflang is required', url: url.loc });
|
||||
}
|
||||
if (!alt.href) {
|
||||
errors.push({ field: 'xhtml:link:href', message: 'Alternate href is required', url: url.loc });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate an entire URL array.
|
||||
*/
|
||||
static validateUrlset(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): interfaces.IValidationResult {
|
||||
const errors: interfaces.IValidationError[] = [];
|
||||
const warnings: interfaces.IValidationWarning[] = [];
|
||||
|
||||
for (const url of urls) {
|
||||
errors.push(...SitemapValidator.validateUrl(url));
|
||||
}
|
||||
|
||||
// Check for duplicates
|
||||
const locs = new Set<string>();
|
||||
for (const url of urls) {
|
||||
if (locs.has(url.loc)) {
|
||||
warnings.push({
|
||||
field: 'loc',
|
||||
message: `Duplicate URL: "${url.loc}"`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
locs.add(url.loc);
|
||||
}
|
||||
|
||||
const maxUrls = options?.maxUrlsPerSitemap ?? MAX_URLS_PER_SITEMAP;
|
||||
|
||||
// Size limit warnings
|
||||
if (urls.length > maxUrls) {
|
||||
warnings.push({
|
||||
field: 'urlset',
|
||||
message: `URL count (${urls.length}) exceeds maximum of ${maxUrls} per sitemap. Use toSitemapSet() for auto-splitting.`,
|
||||
});
|
||||
}
|
||||
|
||||
const stats = SitemapValidator.computeStats(urls, options);
|
||||
|
||||
return {
|
||||
valid: errors.length === 0,
|
||||
errors,
|
||||
warnings,
|
||||
stats,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a URL string for proper format.
|
||||
*/
|
||||
static validateUrlString(url: string): interfaces.IValidationError[] {
|
||||
const errors: interfaces.IValidationError[] = [];
|
||||
|
||||
if (url.length > MAX_URL_LENGTH) {
|
||||
errors.push({
|
||||
field: 'loc',
|
||||
message: `URL exceeds maximum length of ${MAX_URL_LENGTH} characters`,
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
new URL(url);
|
||||
} catch {
|
||||
errors.push({
|
||||
field: 'loc',
|
||||
message: `Invalid URL: "${url}"`,
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute statistics for a set of URLs.
|
||||
*/
|
||||
static computeStats(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): interfaces.ISitemapStats {
|
||||
let imageCount = 0;
|
||||
let videoCount = 0;
|
||||
let newsCount = 0;
|
||||
let alternateCount = 0;
|
||||
|
||||
for (const url of urls) {
|
||||
if (url.images) imageCount += url.images.length;
|
||||
if (url.videos) videoCount += url.videos.length;
|
||||
if (url.news) newsCount++;
|
||||
if (url.alternates) alternateCount += url.alternates.length;
|
||||
}
|
||||
|
||||
// Rough estimate: ~200 bytes per basic URL entry, more for extensions
|
||||
const estimatedSizeBytes =
|
||||
200 + // XML header + urlset tags
|
||||
urls.length * 200 + // base URL entries
|
||||
imageCount * 150 +
|
||||
videoCount * 400 +
|
||||
newsCount * 300 +
|
||||
alternateCount * 100;
|
||||
|
||||
const maxUrls = options?.maxUrlsPerSitemap ?? MAX_URLS_PER_SITEMAP;
|
||||
|
||||
return {
|
||||
urlCount: urls.length,
|
||||
imageCount,
|
||||
videoCount,
|
||||
newsCount,
|
||||
alternateCount,
|
||||
estimatedSizeBytes,
|
||||
needsIndex: urls.length > maxUrls,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check size limits for a URL set.
|
||||
*/
|
||||
static checkSizeLimits(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): {
|
||||
withinLimits: boolean;
|
||||
urlCount: number;
|
||||
maxUrls: number;
|
||||
estimatedSizeBytes: number;
|
||||
maxSizeBytes: number;
|
||||
} {
|
||||
const maxUrls = Math.min(options?.maxUrlsPerSitemap ?? MAX_URLS_PER_SITEMAP, MAX_URLS_PER_SITEMAP);
|
||||
const stats = SitemapValidator.computeStats(urls, options);
|
||||
|
||||
return {
|
||||
withinLimits: urls.length <= maxUrls && stats.estimatedSizeBytes <= MAX_SITEMAP_SIZE_BYTES,
|
||||
urlCount: urls.length,
|
||||
maxUrls,
|
||||
estimatedSizeBytes: stats.estimatedSizeBytes,
|
||||
maxSizeBytes: MAX_SITEMAP_SIZE_BYTES,
|
||||
};
|
||||
}
|
||||
}
|
||||
294
ts/smartsitemap.classes.xmlrenderer.ts
Normal file
294
ts/smartsitemap.classes.xmlrenderer.ts
Normal file
@@ -0,0 +1,294 @@
|
||||
import * as plugins from './smartsitemap.plugins.js';
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
|
||||
// Sitemap XML namespace constants
|
||||
const NS_SITEMAP = 'http://www.sitemaps.org/schemas/sitemap/0.9';
|
||||
const NS_IMAGE = 'http://www.google.com/schemas/sitemap-image/1.1';
|
||||
const NS_VIDEO = 'http://www.google.com/schemas/sitemap-video/1.1';
|
||||
const NS_NEWS = 'http://www.google.com/schemas/sitemap-news/0.9';
|
||||
const NS_XHTML = 'http://www.w3.org/1999/xhtml';
|
||||
|
||||
/**
|
||||
* Handles all XML generation for sitemaps.
|
||||
* Supports proper escaping, namespace detection, date formatting,
|
||||
* XSL stylesheet references, and pretty printing.
|
||||
*/
|
||||
export class XmlRenderer {
|
||||
/**
|
||||
* Escape a string for use in XML content.
|
||||
* Handles the 5 XML special characters.
|
||||
*/
|
||||
static escapeXml(str: string): string {
|
||||
return str
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''');
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a date value (Date, ISO string, or Unix timestamp in ms)
|
||||
* to W3C Datetime format suitable for sitemaps.
|
||||
*/
|
||||
static formatDate(date: Date | string | number): string {
|
||||
if (date instanceof Date) {
|
||||
return date.toISOString();
|
||||
}
|
||||
if (typeof date === 'number') {
|
||||
return new Date(date).toISOString();
|
||||
}
|
||||
// Already a string — validate it parses
|
||||
const parsed = new Date(date);
|
||||
if (isNaN(parsed.getTime())) {
|
||||
return date; // Return as-is if unparseable
|
||||
}
|
||||
return parsed.toISOString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect which XML namespaces are needed based on URL entries.
|
||||
*/
|
||||
static detectNamespaces(urls: interfaces.ISitemapUrl[]): Record<string, string> {
|
||||
const ns: Record<string, string> = {
|
||||
'@_xmlns': NS_SITEMAP,
|
||||
};
|
||||
|
||||
for (const url of urls) {
|
||||
if (url.images && url.images.length > 0) {
|
||||
ns['@_xmlns:image'] = NS_IMAGE;
|
||||
}
|
||||
if (url.videos && url.videos.length > 0) {
|
||||
ns['@_xmlns:video'] = NS_VIDEO;
|
||||
}
|
||||
if (url.news) {
|
||||
ns['@_xmlns:news'] = NS_NEWS;
|
||||
}
|
||||
if (url.alternates && url.alternates.length > 0) {
|
||||
ns['@_xmlns:xhtml'] = NS_XHTML;
|
||||
}
|
||||
}
|
||||
|
||||
return ns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a URL array to sitemap XML string.
|
||||
*/
|
||||
static renderUrlset(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): string {
|
||||
const namespaces = XmlRenderer.detectNamespaces(urls);
|
||||
const urlElements = urls.map((url) => XmlRenderer.buildUrlElement(url, options));
|
||||
|
||||
const xmlObj: any = {
|
||||
urlset: {
|
||||
...namespaces,
|
||||
url: urlElements,
|
||||
},
|
||||
};
|
||||
|
||||
const smartXml = new plugins.smartxml.SmartXml();
|
||||
let xml = smartXml.createXmlFromObject(xmlObj);
|
||||
|
||||
// Insert XSL stylesheet processing instruction if specified
|
||||
if (options?.xslUrl) {
|
||||
xml = XmlRenderer.insertXslInstruction(xml, options.xslUrl);
|
||||
}
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a sitemap index XML string.
|
||||
*/
|
||||
static renderIndex(entries: interfaces.ISitemapIndexEntry[], options?: interfaces.ISitemapOptions): string {
|
||||
const sitemapElements = entries.map((entry) => {
|
||||
const el: any = {
|
||||
loc: XmlRenderer.escapeXml(entry.loc),
|
||||
};
|
||||
if (entry.lastmod != null) {
|
||||
el.lastmod = XmlRenderer.formatDate(entry.lastmod);
|
||||
}
|
||||
return el;
|
||||
});
|
||||
|
||||
const xmlObj: any = {
|
||||
sitemapindex: {
|
||||
'@_xmlns': NS_SITEMAP,
|
||||
sitemap: sitemapElements,
|
||||
},
|
||||
};
|
||||
|
||||
const smartXml = new plugins.smartxml.SmartXml();
|
||||
let xml = smartXml.createXmlFromObject(xmlObj);
|
||||
|
||||
if (options?.xslUrl) {
|
||||
xml = XmlRenderer.insertXslInstruction(xml, options.xslUrl);
|
||||
}
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
/**
|
||||
* Render URLs as plain text (one URL per line).
|
||||
*/
|
||||
static renderTxt(urls: interfaces.ISitemapUrl[]): string {
|
||||
return urls.map((u) => u.loc).join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Render URLs as JSON.
|
||||
*/
|
||||
static renderJson(urls: interfaces.ISitemapUrl[]): string {
|
||||
return JSON.stringify(urls, null, 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a single <url> element object for use with smartxml.
|
||||
*/
|
||||
private static buildUrlElement(url: interfaces.ISitemapUrl, options?: interfaces.ISitemapOptions): any {
|
||||
const el: any = {
|
||||
loc: XmlRenderer.escapeXml(url.loc),
|
||||
};
|
||||
|
||||
// lastmod
|
||||
if (url.lastmod != null) {
|
||||
el.lastmod = XmlRenderer.formatDate(url.lastmod);
|
||||
}
|
||||
|
||||
// changefreq (use default if not specified)
|
||||
const changefreq = url.changefreq ?? options?.defaultChangeFreq;
|
||||
if (changefreq) {
|
||||
el.changefreq = changefreq;
|
||||
}
|
||||
|
||||
// priority (use default if not specified)
|
||||
const priority = url.priority ?? options?.defaultPriority;
|
||||
if (priority != null) {
|
||||
el.priority = priority.toFixed(1);
|
||||
}
|
||||
|
||||
// Image extension
|
||||
if (url.images && url.images.length > 0) {
|
||||
el['image:image'] = url.images.map((img) => XmlRenderer.buildImageElement(img));
|
||||
}
|
||||
|
||||
// Video extension
|
||||
if (url.videos && url.videos.length > 0) {
|
||||
el['video:video'] = url.videos.map((vid) => XmlRenderer.buildVideoElement(vid));
|
||||
}
|
||||
|
||||
// News extension
|
||||
if (url.news) {
|
||||
el['news:news'] = XmlRenderer.buildNewsElement(url.news);
|
||||
}
|
||||
|
||||
// hreflang alternates
|
||||
if (url.alternates && url.alternates.length > 0) {
|
||||
el['xhtml:link'] = url.alternates.map((alt) => ({
|
||||
'@_rel': 'alternate',
|
||||
'@_hreflang': alt.hreflang,
|
||||
'@_href': XmlRenderer.escapeXml(alt.href),
|
||||
}));
|
||||
}
|
||||
|
||||
return el;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an <image:image> element object.
|
||||
*/
|
||||
private static buildImageElement(img: interfaces.ISitemapImage): any {
|
||||
const el: any = {
|
||||
'image:loc': XmlRenderer.escapeXml(img.loc),
|
||||
};
|
||||
if (img.caption) {
|
||||
el['image:caption'] = XmlRenderer.escapeXml(img.caption);
|
||||
}
|
||||
if (img.title) {
|
||||
el['image:title'] = XmlRenderer.escapeXml(img.title);
|
||||
}
|
||||
if (img.geoLocation) {
|
||||
el['image:geo_location'] = XmlRenderer.escapeXml(img.geoLocation);
|
||||
}
|
||||
if (img.licenseUrl) {
|
||||
el['image:license'] = XmlRenderer.escapeXml(img.licenseUrl);
|
||||
}
|
||||
return el;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a <video:video> element object.
|
||||
*/
|
||||
private static buildVideoElement(vid: interfaces.ISitemapVideo): any {
|
||||
const el: any = {
|
||||
'video:thumbnail_loc': XmlRenderer.escapeXml(vid.thumbnailLoc),
|
||||
'video:title': XmlRenderer.escapeXml(vid.title),
|
||||
'video:description': XmlRenderer.escapeXml(vid.description),
|
||||
};
|
||||
|
||||
if (vid.contentLoc) {
|
||||
el['video:content_loc'] = XmlRenderer.escapeXml(vid.contentLoc);
|
||||
}
|
||||
if (vid.playerLoc) {
|
||||
el['video:player_loc'] = XmlRenderer.escapeXml(vid.playerLoc);
|
||||
}
|
||||
if (vid.duration != null) {
|
||||
el['video:duration'] = vid.duration;
|
||||
}
|
||||
if (vid.rating != null) {
|
||||
el['video:rating'] = vid.rating;
|
||||
}
|
||||
if (vid.viewCount != null) {
|
||||
el['video:view_count'] = vid.viewCount;
|
||||
}
|
||||
if (vid.publicationDate != null) {
|
||||
el['video:publication_date'] = XmlRenderer.formatDate(vid.publicationDate);
|
||||
}
|
||||
if (vid.familyFriendly != null) {
|
||||
el['video:family_friendly'] = vid.familyFriendly ? 'yes' : 'no';
|
||||
}
|
||||
if (vid.tags && vid.tags.length > 0) {
|
||||
el['video:tag'] = vid.tags;
|
||||
}
|
||||
if (vid.live != null) {
|
||||
el['video:live'] = vid.live ? 'yes' : 'no';
|
||||
}
|
||||
if (vid.requiresSubscription != null) {
|
||||
el['video:requires_subscription'] = vid.requiresSubscription ? 'yes' : 'no';
|
||||
}
|
||||
|
||||
return el;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a <news:news> element object.
|
||||
*/
|
||||
private static buildNewsElement(news: interfaces.ISitemapNews): any {
|
||||
const el: any = {
|
||||
'news:publication': {
|
||||
'news:name': XmlRenderer.escapeXml(news.publication.name),
|
||||
'news:language': news.publication.language,
|
||||
},
|
||||
'news:publication_date': XmlRenderer.formatDate(news.publicationDate),
|
||||
'news:title': XmlRenderer.escapeXml(news.title),
|
||||
};
|
||||
|
||||
if (news.keywords) {
|
||||
const kw = Array.isArray(news.keywords) ? news.keywords.join(', ') : news.keywords;
|
||||
el['news:keywords'] = XmlRenderer.escapeXml(kw);
|
||||
}
|
||||
|
||||
return el;
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert an XSL stylesheet processing instruction after the XML declaration.
|
||||
*/
|
||||
private static insertXslInstruction(xml: string, xslUrl: string): string {
|
||||
const pi = `<?xml-stylesheet type="text/xsl" href="${XmlRenderer.escapeXml(xslUrl)}"?>`;
|
||||
return xml.replace(
|
||||
'<?xml version="1.0" encoding="UTF-8"?>',
|
||||
`<?xml version="1.0" encoding="UTF-8"?>\n${pi}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
61
ts/smartsitemap.classes.yamlimporter.ts
Normal file
61
ts/smartsitemap.classes.yamlimporter.ts
Normal file
@@ -0,0 +1,61 @@
|
||||
import * as plugins from './smartsitemap.plugins.js';
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
|
||||
/**
|
||||
* Imports sitemap configuration from YAML format.
|
||||
* Supports the enhanced YAML schema with per-frequency URL groups,
|
||||
* default settings, and feed imports.
|
||||
*/
|
||||
export class YamlImporter {
|
||||
/**
|
||||
* Parse a YAML config string and return ISitemapUrl entries.
|
||||
*/
|
||||
static async parseConfig(yamlString: string): Promise<interfaces.ISitemapUrl[]> {
|
||||
const config = (await plugins.smartyaml.yamlStringToObject(yamlString)) as interfaces.ISitemapYamlConfig;
|
||||
const urls: interfaces.ISitemapUrl[] = [];
|
||||
const baseUrl = config.baseUrl?.replace(/\/$/, '') ?? '';
|
||||
|
||||
// Process URL groups by frequency
|
||||
if (config.urls) {
|
||||
const frequencies: interfaces.TChangeFreq[] = [
|
||||
'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never',
|
||||
];
|
||||
|
||||
for (const freq of frequencies) {
|
||||
const urlList = config.urls[freq];
|
||||
if (urlList && Array.isArray(urlList)) {
|
||||
for (const path of urlList) {
|
||||
const loc = path.startsWith('http') ? path : `${baseUrl}${path.startsWith('/') ? '' : '/'}${path}`;
|
||||
urls.push({
|
||||
loc,
|
||||
changefreq: freq,
|
||||
priority: config.defaults?.priority,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process feed imports
|
||||
if (config.feeds && Array.isArray(config.feeds)) {
|
||||
// Dynamic import to avoid circular deps at module load time
|
||||
const { FeedImporter } = await import('./smartsitemap.classes.feedimporter.js');
|
||||
|
||||
for (const feedConfig of config.feeds) {
|
||||
if (feedConfig.type === 'news') {
|
||||
const newsUrls = await FeedImporter.fromUrlAsNews(
|
||||
feedConfig.url,
|
||||
feedConfig.publicationName ?? 'Unknown',
|
||||
feedConfig.publicationLanguage ?? 'en',
|
||||
);
|
||||
urls.push(...newsUrls);
|
||||
} else {
|
||||
const standardUrls = await FeedImporter.fromUrl(feedConfig.url);
|
||||
urls.push(...standardUrls);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,17 @@
|
||||
// node built-ins
|
||||
import * as zlib from 'zlib';
|
||||
import { promisify } from 'util';
|
||||
import { Readable } from 'stream';
|
||||
|
||||
export { zlib, promisify, Readable };
|
||||
|
||||
// pushrocks scope
|
||||
import * as smartcache from '@push.rocks/smartcache';
|
||||
import * as smartfeed from '@push.rocks/smartfeed';
|
||||
import * as smartxml from '@push.rocks/smartxml';
|
||||
import * as smartyaml from '@push.rocks/smartyaml';
|
||||
import * as webrequest from '@push.rocks/webrequest';
|
||||
|
||||
export { smartcache, smartfeed, smartxml, smartyaml, webrequest };
|
||||
export { smartfeed, smartxml, smartyaml, webrequest };
|
||||
|
||||
// tsclass
|
||||
import * as tsclass from '@tsclass/tsclass';
|
||||
|
||||
Reference in New Issue
Block a user