BREAKING CHANGE(api): redesign smartsitemap around builder-based sitemap creation, parsing, validation, and import utilities
This commit is contained in:
159
ts/smartsitemap.classes.feedimporter.ts
Normal file
159
ts/smartsitemap.classes.feedimporter.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
import * as plugins from './smartsitemap.plugins.js';
|
||||
import type * as interfaces from './interfaces/index.js';
|
||||
|
||||
/**
|
||||
* Imports RSS/Atom feeds and converts them to sitemap URL entries.
|
||||
* This is a unique feature of smartsitemap that competitors don't offer.
|
||||
*/
|
||||
export class FeedImporter {
|
||||
/**
|
||||
* Import from a feed URL, returning standard sitemap URL entries.
|
||||
*/
|
||||
static async fromUrl(
|
||||
feedUrl: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): Promise<interfaces.ISitemapUrl[]> {
|
||||
const smartfeed = new plugins.smartfeed.Smartfeed();
|
||||
const feed = await smartfeed.parseFeedFromUrl(feedUrl);
|
||||
return FeedImporter.mapItems(feed.items, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Import from a feed XML string, returning standard sitemap URL entries.
|
||||
*/
|
||||
static async fromString(
|
||||
feedXml: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): Promise<interfaces.ISitemapUrl[]> {
|
||||
const smartfeed = new plugins.smartfeed.Smartfeed();
|
||||
const feed = await smartfeed.parseFeedFromString(feedXml);
|
||||
return FeedImporter.mapItems(feed.items, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Import from a feed URL, returning news sitemap URL entries.
|
||||
*/
|
||||
static async fromUrlAsNews(
|
||||
feedUrl: string,
|
||||
publicationName: string,
|
||||
publicationLanguage?: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): Promise<interfaces.ISitemapUrl[]> {
|
||||
const smartfeed = new plugins.smartfeed.Smartfeed();
|
||||
const feed = await smartfeed.parseFeedFromUrl(feedUrl);
|
||||
return FeedImporter.mapItemsAsNews(feed.items, publicationName, publicationLanguage ?? 'en', options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Import from a feed string, returning news sitemap URL entries.
|
||||
*/
|
||||
static async fromStringAsNews(
|
||||
feedXml: string,
|
||||
publicationName: string,
|
||||
publicationLanguage?: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): Promise<interfaces.ISitemapUrl[]> {
|
||||
const smartfeed = new plugins.smartfeed.Smartfeed();
|
||||
const feed = await smartfeed.parseFeedFromString(feedXml);
|
||||
return FeedImporter.mapItemsAsNews(feed.items, publicationName, publicationLanguage ?? 'en', options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Map parsed feed items to standard sitemap URLs.
|
||||
*/
|
||||
private static mapItems(
|
||||
items: any[],
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): interfaces.ISitemapUrl[] {
|
||||
let filtered = FeedImporter.filterItems(items, options);
|
||||
|
||||
if (options?.mapItem) {
|
||||
const results: interfaces.ISitemapUrl[] = [];
|
||||
for (const item of filtered) {
|
||||
const mapped = options.mapItem(item as interfaces.IFeedItem);
|
||||
if (mapped) results.push(mapped);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
return filtered
|
||||
.filter((item: any) => item.link)
|
||||
.map((item: any) => {
|
||||
const url: interfaces.ISitemapUrl = {
|
||||
loc: item.link,
|
||||
};
|
||||
if (item.isoDate) {
|
||||
url.lastmod = item.isoDate;
|
||||
}
|
||||
return url;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Map parsed feed items to news sitemap URLs.
|
||||
*/
|
||||
private static mapItemsAsNews(
|
||||
items: any[],
|
||||
publicationName: string,
|
||||
publicationLanguage: string,
|
||||
options?: interfaces.IFeedImportOptions,
|
||||
): interfaces.ISitemapUrl[] {
|
||||
let filtered = FeedImporter.filterItems(items, options);
|
||||
|
||||
if (options?.mapItem) {
|
||||
const results: interfaces.ISitemapUrl[] = [];
|
||||
for (const item of filtered) {
|
||||
const mapped = options.mapItem(item as interfaces.IFeedItem);
|
||||
if (mapped) results.push(mapped);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
return filtered
|
||||
.filter((item: any) => item.link)
|
||||
.map((item: any) => {
|
||||
const url: interfaces.ISitemapUrl = {
|
||||
loc: item.link,
|
||||
news: {
|
||||
publication: {
|
||||
name: publicationName,
|
||||
language: publicationLanguage,
|
||||
},
|
||||
publicationDate: item.isoDate || new Date().toISOString(),
|
||||
title: item.title || '',
|
||||
keywords: item.categories,
|
||||
},
|
||||
};
|
||||
if (item.isoDate) {
|
||||
url.lastmod = item.isoDate;
|
||||
}
|
||||
return url;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply date and limit filters to feed items.
|
||||
*/
|
||||
private static filterItems(items: any[], options?: interfaces.IFeedImportOptions): any[] {
|
||||
let result = [...items];
|
||||
|
||||
// Filter by date
|
||||
if (options?.newerThan != null) {
|
||||
const threshold = options.newerThan instanceof Date
|
||||
? options.newerThan.getTime()
|
||||
: options.newerThan;
|
||||
|
||||
result = result.filter((item: any) => {
|
||||
if (!item.isoDate) return true; // keep items without dates
|
||||
return new Date(item.isoDate).getTime() >= threshold;
|
||||
});
|
||||
}
|
||||
|
||||
// Apply limit
|
||||
if (options?.limit != null && options.limit > 0) {
|
||||
result = result.slice(0, options.limit);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user