fix(smartpdf): harden browser lifecycle, port handling, and PDF result metadata
This commit is contained in:
@@ -31,21 +31,21 @@ export class SmartPdf {
|
||||
}
|
||||
|
||||
// INSTANCE
|
||||
private smartserveInstance: plugins.smartserve.SmartServe;
|
||||
serverPort: number;
|
||||
headlessBrowser: plugins.smartpuppeteer.puppeteer.Browser;
|
||||
private smartserveInstance: plugins.smartserve.SmartServe | null = null;
|
||||
serverPort: number = 0;
|
||||
headlessBrowser: plugins.smartpuppeteer.puppeteer.Browser | null = null;
|
||||
externalBrowserBool: boolean = false;
|
||||
private _readyDeferred: plugins.smartpromise.Deferred<void>;
|
||||
private _candidates: { [key: string]: PdfCandidate } = {};
|
||||
private _options: ISmartPdfOptions;
|
||||
private _options: ISmartPdfOptions & { portRangeStart: number; portRangeEnd: number };
|
||||
private _isRunning: boolean = false;
|
||||
|
||||
constructor(optionsArg?: ISmartPdfOptions) {
|
||||
this._readyDeferred = new plugins.smartpromise.Deferred();
|
||||
this._options = {
|
||||
portRangeStart: 20000,
|
||||
portRangeEnd: 30000,
|
||||
...optionsArg
|
||||
...optionsArg,
|
||||
portRangeStart: optionsArg?.portRangeStart ?? 20000,
|
||||
portRangeEnd: optionsArg?.portRangeEnd ?? 30000,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -58,7 +58,8 @@ export class SmartPdf {
|
||||
this._readyDeferred = new plugins.smartpromise.Deferred();
|
||||
|
||||
// lets set the external browser in case one is provided
|
||||
this.headlessBrowser = headlessBrowserArg;
|
||||
this.externalBrowserBool = !!headlessBrowserArg;
|
||||
this.headlessBrowser = headlessBrowserArg ?? null;
|
||||
// setup puppeteer
|
||||
if (this.headlessBrowser) {
|
||||
this.externalBrowserBool = true;
|
||||
@@ -86,11 +87,11 @@ export class SmartPdf {
|
||||
}
|
||||
} else {
|
||||
// Find a free port in the specified range
|
||||
this.serverPort = await smartnetworkInstance.findFreePort(
|
||||
const freePort = await smartnetworkInstance.findFreePort(
|
||||
this._options.portRangeStart,
|
||||
this._options.portRangeEnd
|
||||
);
|
||||
if (!this.serverPort) {
|
||||
if (!freePort) {
|
||||
// Clean up browser if we created one
|
||||
if (!this.externalBrowserBool && this.headlessBrowser) {
|
||||
await this.headlessBrowser.close();
|
||||
@@ -98,6 +99,7 @@ export class SmartPdf {
|
||||
}
|
||||
throw new Error(`No free ports available in range ${this._options.portRangeStart}-${this._options.portRangeEnd}`);
|
||||
}
|
||||
this.serverPort = freePort;
|
||||
}
|
||||
|
||||
// Now setup server using smartserve
|
||||
@@ -152,6 +154,13 @@ export class SmartPdf {
|
||||
this._candidates = {};
|
||||
}
|
||||
|
||||
private getBrowser(): plugins.smartpuppeteer.puppeteer.Browser {
|
||||
if (!this.headlessBrowser) {
|
||||
throw new Error('SmartPdf is not running. Call start() before creating PDFs.');
|
||||
}
|
||||
return this.headlessBrowser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a PDF for a given HTML string.
|
||||
*/
|
||||
@@ -159,9 +168,9 @@ export class SmartPdf {
|
||||
await this._readyDeferred.promise;
|
||||
const pdfCandidate = new PdfCandidate(htmlStringArg);
|
||||
this._candidates[pdfCandidate.pdfId] = pdfCandidate;
|
||||
let page: plugins.smartpuppeteer.puppeteer.Page;
|
||||
let page: plugins.smartpuppeteer.puppeteer.Page | undefined;
|
||||
try {
|
||||
page = await this.headlessBrowser.newPage();
|
||||
page = await this.getBrowser().newPage();
|
||||
await page.setViewport({
|
||||
width: 794,
|
||||
height: 1122,
|
||||
@@ -169,10 +178,13 @@ export class SmartPdf {
|
||||
const response = await page.goto(`http://localhost:${this.serverPort}/${pdfCandidate.pdfId}`, {
|
||||
waitUntil: 'networkidle2',
|
||||
});
|
||||
if (!response) {
|
||||
throw new Error('No response received while rendering PDF candidate.');
|
||||
}
|
||||
const headers = response.headers();
|
||||
if (headers['pdf-id'] !== pdfCandidate.pdfId) {
|
||||
console.log('Error! Headers do not match. For security reasons no pdf is being emitted!');
|
||||
return;
|
||||
throw new Error('PDF candidate security check failed.');
|
||||
} else {
|
||||
console.log(`id security check passed for ${pdfCandidate.pdfId}`);
|
||||
}
|
||||
@@ -208,7 +220,7 @@ export class SmartPdf {
|
||||
}
|
||||
|
||||
async getPdfResultForWebsite(websiteUrl: string): Promise<plugins.tsclass.business.IPdf> {
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
const page = await this.getBrowser().newPage();
|
||||
try {
|
||||
await page.setViewport({
|
||||
width: 1980,
|
||||
@@ -247,7 +259,7 @@ export class SmartPdf {
|
||||
}
|
||||
|
||||
async getFullWebsiteAsSinglePdf(websiteUrl: string): Promise<plugins.tsclass.business.IPdf> {
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
const page = await this.getBrowser().newPage();
|
||||
try {
|
||||
await page.setViewport({
|
||||
width: 1920,
|
||||
@@ -320,8 +332,10 @@ export class SmartPdf {
|
||||
return {
|
||||
name: parsedPath.base,
|
||||
buffer,
|
||||
id: null,
|
||||
metadata: null,
|
||||
id: parsedPath.base,
|
||||
metadata: {
|
||||
textExtraction: await this.extractTextFromPdfBuffer(buffer),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -360,7 +374,7 @@ export class SmartPdf {
|
||||
const scale = options.scale || 3.0;
|
||||
|
||||
// Create a new page using the headless browser.
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
const page = await this.getBrowser().newPage();
|
||||
|
||||
try {
|
||||
// Prepare PDF data as a base64 string.
|
||||
@@ -471,7 +485,7 @@ export class SmartPdf {
|
||||
const quality = options.quality || 85;
|
||||
|
||||
// Create a new page using the headless browser
|
||||
const page = await this.headlessBrowser.newPage();
|
||||
const page = await this.getBrowser().newPage();
|
||||
|
||||
try {
|
||||
// Prepare PDF data as a base64 string
|
||||
|
||||
Reference in New Issue
Block a user