BREAKING CHANGE(api): redesign smartsitemap around builder-based sitemap creation, parsing, validation, and import utilities
This commit is contained in:
289
ts/smartsitemap.classes.validator.ts
Normal file
289
ts/smartsitemap.classes.validator.ts
Normal file
@@ -0,0 +1,289 @@
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
|
||||
const VALID_CHANGEFREQS: interfaces.TChangeFreq[] = [
|
||||
'always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never',
|
||||
];
|
||||
|
||||
const MAX_URL_LENGTH = 2048;
|
||||
const MAX_URLS_PER_SITEMAP = 50000;
|
||||
const MAX_SITEMAP_SIZE_BYTES = 52_428_800; // 50 MB
|
||||
const MAX_IMAGES_PER_URL = 1000;
|
||||
const MAX_VIDEO_TAGS = 32;
|
||||
const MAX_VIDEO_DURATION = 28800;
|
||||
const MAX_VIDEO_DESCRIPTION_LENGTH = 2048;
|
||||
|
||||
/**
|
||||
* Validates sitemap URLs and fields against the sitemap protocol specification.
|
||||
*/
|
||||
export class SitemapValidator {
|
||||
/**
|
||||
* Validate a single URL entry.
|
||||
*/
|
||||
static validateUrl(url: interfaces.ISitemapUrl): interfaces.IValidationError[] {
|
||||
const errors: interfaces.IValidationError[] = [];
|
||||
|
||||
// loc is required
|
||||
if (!url.loc) {
|
||||
errors.push({ field: 'loc', message: 'URL loc is required', url: url.loc });
|
||||
} else {
|
||||
errors.push(...SitemapValidator.validateUrlString(url.loc));
|
||||
}
|
||||
|
||||
// priority range
|
||||
if (url.priority != null && (url.priority < 0 || url.priority > 1)) {
|
||||
errors.push({
|
||||
field: 'priority',
|
||||
message: 'Priority must be between 0.0 and 1.0',
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
|
||||
// changefreq
|
||||
if (url.changefreq && !VALID_CHANGEFREQS.includes(url.changefreq)) {
|
||||
errors.push({
|
||||
field: 'changefreq',
|
||||
message: `Invalid changefreq "${url.changefreq}". Must be one of: ${VALID_CHANGEFREQS.join(', ')}`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
|
||||
// lastmod date validation
|
||||
if (url.lastmod != null) {
|
||||
const date = url.lastmod instanceof Date ? url.lastmod : new Date(url.lastmod as any);
|
||||
if (isNaN(date.getTime())) {
|
||||
errors.push({
|
||||
field: 'lastmod',
|
||||
message: `Invalid lastmod date: "${url.lastmod}"`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Images
|
||||
if (url.images) {
|
||||
if (url.images.length > MAX_IMAGES_PER_URL) {
|
||||
errors.push({
|
||||
field: 'images',
|
||||
message: `Maximum ${MAX_IMAGES_PER_URL} images per URL, got ${url.images.length}`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
for (const img of url.images) {
|
||||
if (!img.loc) {
|
||||
errors.push({ field: 'image:loc', message: 'Image loc is required', url: url.loc });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Videos
|
||||
if (url.videos) {
|
||||
for (const vid of url.videos) {
|
||||
if (!vid.thumbnailLoc) {
|
||||
errors.push({ field: 'video:thumbnail_loc', message: 'Video thumbnail_loc is required', url: url.loc });
|
||||
}
|
||||
if (!vid.title) {
|
||||
errors.push({ field: 'video:title', message: 'Video title is required', url: url.loc });
|
||||
}
|
||||
if (!vid.description) {
|
||||
errors.push({ field: 'video:description', message: 'Video description is required', url: url.loc });
|
||||
}
|
||||
if (vid.description && vid.description.length > MAX_VIDEO_DESCRIPTION_LENGTH) {
|
||||
errors.push({
|
||||
field: 'video:description',
|
||||
message: `Video description exceeds ${MAX_VIDEO_DESCRIPTION_LENGTH} chars`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
if (!vid.contentLoc && !vid.playerLoc) {
|
||||
errors.push({
|
||||
field: 'video:content_loc',
|
||||
message: 'Video must have at least one of contentLoc or playerLoc',
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
if (vid.duration != null && (vid.duration < 1 || vid.duration > MAX_VIDEO_DURATION)) {
|
||||
errors.push({
|
||||
field: 'video:duration',
|
||||
message: `Video duration must be 1–${MAX_VIDEO_DURATION} seconds`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
if (vid.rating != null && (vid.rating < 0 || vid.rating > 5)) {
|
||||
errors.push({
|
||||
field: 'video:rating',
|
||||
message: 'Video rating must be 0.0–5.0',
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
if (vid.tags && vid.tags.length > MAX_VIDEO_TAGS) {
|
||||
errors.push({
|
||||
field: 'video:tag',
|
||||
message: `Maximum ${MAX_VIDEO_TAGS} video tags, got ${vid.tags.length}`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// News
|
||||
if (url.news) {
|
||||
if (!url.news.publication?.name) {
|
||||
errors.push({ field: 'news:publication:name', message: 'News publication name is required', url: url.loc });
|
||||
}
|
||||
if (!url.news.publication?.language) {
|
||||
errors.push({ field: 'news:publication:language', message: 'News publication language is required', url: url.loc });
|
||||
}
|
||||
if (!url.news.title) {
|
||||
errors.push({ field: 'news:title', message: 'News title is required', url: url.loc });
|
||||
}
|
||||
if (url.news.publicationDate == null) {
|
||||
errors.push({ field: 'news:publication_date', message: 'News publication date is required', url: url.loc });
|
||||
}
|
||||
}
|
||||
|
||||
// Alternates
|
||||
if (url.alternates) {
|
||||
for (const alt of url.alternates) {
|
||||
if (!alt.hreflang) {
|
||||
errors.push({ field: 'xhtml:link:hreflang', message: 'Alternate hreflang is required', url: url.loc });
|
||||
}
|
||||
if (!alt.href) {
|
||||
errors.push({ field: 'xhtml:link:href', message: 'Alternate href is required', url: url.loc });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate an entire URL array.
|
||||
*/
|
||||
static validateUrlset(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): interfaces.IValidationResult {
|
||||
const errors: interfaces.IValidationError[] = [];
|
||||
const warnings: interfaces.IValidationWarning[] = [];
|
||||
|
||||
for (const url of urls) {
|
||||
errors.push(...SitemapValidator.validateUrl(url));
|
||||
}
|
||||
|
||||
// Check for duplicates
|
||||
const locs = new Set<string>();
|
||||
for (const url of urls) {
|
||||
if (locs.has(url.loc)) {
|
||||
warnings.push({
|
||||
field: 'loc',
|
||||
message: `Duplicate URL: "${url.loc}"`,
|
||||
url: url.loc,
|
||||
});
|
||||
}
|
||||
locs.add(url.loc);
|
||||
}
|
||||
|
||||
const maxUrls = options?.maxUrlsPerSitemap ?? MAX_URLS_PER_SITEMAP;
|
||||
|
||||
// Size limit warnings
|
||||
if (urls.length > maxUrls) {
|
||||
warnings.push({
|
||||
field: 'urlset',
|
||||
message: `URL count (${urls.length}) exceeds maximum of ${maxUrls} per sitemap. Use toSitemapSet() for auto-splitting.`,
|
||||
});
|
||||
}
|
||||
|
||||
const stats = SitemapValidator.computeStats(urls, options);
|
||||
|
||||
return {
|
||||
valid: errors.length === 0,
|
||||
errors,
|
||||
warnings,
|
||||
stats,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a URL string for proper format.
|
||||
*/
|
||||
static validateUrlString(url: string): interfaces.IValidationError[] {
|
||||
const errors: interfaces.IValidationError[] = [];
|
||||
|
||||
if (url.length > MAX_URL_LENGTH) {
|
||||
errors.push({
|
||||
field: 'loc',
|
||||
message: `URL exceeds maximum length of ${MAX_URL_LENGTH} characters`,
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
new URL(url);
|
||||
} catch {
|
||||
errors.push({
|
||||
field: 'loc',
|
||||
message: `Invalid URL: "${url}"`,
|
||||
url,
|
||||
});
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute statistics for a set of URLs.
|
||||
*/
|
||||
static computeStats(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): interfaces.ISitemapStats {
|
||||
let imageCount = 0;
|
||||
let videoCount = 0;
|
||||
let newsCount = 0;
|
||||
let alternateCount = 0;
|
||||
|
||||
for (const url of urls) {
|
||||
if (url.images) imageCount += url.images.length;
|
||||
if (url.videos) videoCount += url.videos.length;
|
||||
if (url.news) newsCount++;
|
||||
if (url.alternates) alternateCount += url.alternates.length;
|
||||
}
|
||||
|
||||
// Rough estimate: ~200 bytes per basic URL entry, more for extensions
|
||||
const estimatedSizeBytes =
|
||||
200 + // XML header + urlset tags
|
||||
urls.length * 200 + // base URL entries
|
||||
imageCount * 150 +
|
||||
videoCount * 400 +
|
||||
newsCount * 300 +
|
||||
alternateCount * 100;
|
||||
|
||||
const maxUrls = options?.maxUrlsPerSitemap ?? MAX_URLS_PER_SITEMAP;
|
||||
|
||||
return {
|
||||
urlCount: urls.length,
|
||||
imageCount,
|
||||
videoCount,
|
||||
newsCount,
|
||||
alternateCount,
|
||||
estimatedSizeBytes,
|
||||
needsIndex: urls.length > maxUrls,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check size limits for a URL set.
|
||||
*/
|
||||
static checkSizeLimits(urls: interfaces.ISitemapUrl[], options?: interfaces.ISitemapOptions): {
|
||||
withinLimits: boolean;
|
||||
urlCount: number;
|
||||
maxUrls: number;
|
||||
estimatedSizeBytes: number;
|
||||
maxSizeBytes: number;
|
||||
} {
|
||||
const maxUrls = Math.min(options?.maxUrlsPerSitemap ?? MAX_URLS_PER_SITEMAP, MAX_URLS_PER_SITEMAP);
|
||||
const stats = SitemapValidator.computeStats(urls, options);
|
||||
|
||||
return {
|
||||
withinLimits: urls.length <= maxUrls && stats.estimatedSizeBytes <= MAX_SITEMAP_SIZE_BYTES,
|
||||
urlCount: urls.length,
|
||||
maxUrls,
|
||||
estimatedSizeBytes: stats.estimatedSizeBytes,
|
||||
maxSizeBytes: MAX_SITEMAP_SIZE_BYTES,
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user