diff --git a/lib/browser.js b/lib/browser.js index 4e4e0f01..5a8a7d3a 100644 --- a/lib/browser.js +++ b/lib/browser.js @@ -29,9 +29,10 @@ import ExportError from './errors/ExportError.js'; // Get the template for the page const template = readFileSync(__dirname + '/templates/template.html', 'utf8'); +// To save the browser let browser; -// Save the WebSocket endpoint in case of a sudden disconnect +// To save the WebSocket endpoint in case of a sudden disconnect let wsEndpoint; /** @@ -41,20 +42,15 @@ let wsEndpoint; * browser and relaunch a new instance. */ async function reconnect() { - log(3, `[browser] Browser disconnected, attempting to reconnect.`); try { // Start the reconnecting log(3, `[browser] Restarting the browser connection.`); - // If not able to do so, try to reconnect the browser - const connectOptions = { - browserWSEndpoint: wsEndpoint - }; - wsEndpoint = null; - // Try to reconnect the browser if (browser && !browser.connected) { - browser = await puppeteer.connect(connectOptions); + browser = await puppeteer.connect({ + browserWSEndpoint: wsEndpoint + }); } // Save a new WebSocket endpoint @@ -74,7 +70,7 @@ async function reconnect() { // Try to close the browser before relaunching try { - await browser.close(); + await close(); } catch (error) { logWithStack( 1, @@ -82,7 +78,6 @@ async function reconnect() { '[browser] Could not close the browser before relaunch.' ); } - browser = null; // Try to relaunch the browser await create(getOptions().puppeteer.args || []); @@ -218,9 +213,10 @@ export async function create(puppeteerArgs) { */ export async function close() { // Close the browser when connected - if (browser?.connected) { + if (browser && browser.connected) { await browser.close(); } + browser = null; log(4, '[browser] Closed the browser.'); } @@ -232,50 +228,51 @@ export async function close() { * The function creates a new page, disables caching, sets content using * setPageContent(), and returns the created Puppeteer Page. * - * @param {Object} resource - The pool resource that contians page and id. + * @param {Object} poolResource - The pool resource that contians page and id. * * @returns {(boolean|object)} Returns false if the browser instance is not * available, or a Puppeteer Page object representing the newly created page. */ -export async function newPage(resource) { +export async function newPage(poolResource) { const startDate = new Date().getTime(); - if (!browser) { - return false; + // Throw an error in case of no connected browser + if (!browser || !browser.connected) { + throw new ExportError(`[browser] Browser is not yet connected.`, 400); } // Create a page - resource.page = await browser.newPage(); + poolResource.page = await browser.newPage(); // Disable cache - await resource.page.setCacheEnabled(false); + await poolResource.page.setCacheEnabled(false); // Set the content - await setPageContent(resource.page); + await setPageContent(poolResource.page); // Set page events - setPageEvents(resource); + setPageEvents(poolResource); // Check if the page is correctly created - if (!resource.page || resource.page.isClosed()) { + if (!poolResource.page || poolResource.page.isClosed()) { throw new ExportError('The page is invalid or closed.', 500); } log( 3, - `[pool] Successfully created a worker ${resource.id} - took ${ + `[pool] Successfully created a worker ${poolResource.id} - took ${ new Date().getTime() - startDate } ms.` ); // Return the resource with a ready to use page - return resource; + return poolResource; } /** * Clears the content of a Puppeteer Page based on the specified mode. * - * @param {Object} resource - The pool resource that contians page and id. + * @param {Object} poolResource - The pool resource that contians page and id. * @param {boolean} hardReset - A flag indicating the type of clearing * to be performed. If true, navigates to 'about:blank' and resets content * and scripts. If false, clears the body content by setting a predefined HTML @@ -283,20 +280,20 @@ export async function newPage(resource) { * * @throws {Error} Logs thrown error if clearing the page content fails. */ -export async function clearPage(resource, hardReset = false) { +export async function clearPage(poolResource, hardReset = false) { try { - if (!resource.page.isClosed()) { + if (!poolResource.page.isClosed()) { if (hardReset) { // Navigate to about:blank - await resource.page.goto('about:blank', { + await poolResource.page.goto('about:blank', { waitUntil: 'domcontentloaded' }); // Set the content and and scripts again - await setPageContent(resource.page); + await setPageContent(poolResource.page); } else { // Clear body content - await resource.page.evaluate(() => { + await poolResource.page.evaluate(() => { document.body.innerHTML = '
'; }); @@ -306,7 +303,7 @@ export async function clearPage(resource, hardReset = false) { logWithStack( 2, error, - `[browser] Worker with ID ${resource.id}: could not clear the content of the page.` + `[browser] Worker with ID ${poolResource.id}: could not clear the content of the page.` ); } } @@ -491,17 +488,17 @@ async function setPageContent(page) { /** * Set events for a Puppeteer Page. * - * @param {Object} resource - The pool resource that contians page and id. + * @param {Object} poolResource - The pool resource that contians page and id. */ -function setPageEvents(resource) { +function setPageEvents(poolResource) { // Get debug options - const { debug } = getOptions(); + const { debug, pool } = getOptions(); // Set the pageerror listener - resource.page.on('pageerror', async (error) => { + poolResource.page.on('pageerror', async (error) => { // TODO: Consider adding a switch here that turns on log(0) logging // on page errors. - await resource.page.$eval( + await poolResource.page.$eval( '#container', (element, errorMessage) => { // eslint-disable-next-line no-undef @@ -515,47 +512,50 @@ function setPageEvents(resource) { // Set the console listener, if needed if (debug.enable && debug.listenToConsole) { - resource.page.on('console', (message) => { + poolResource.page.on('console', (message) => { console.log(`[debug] ${message.text()}`); }); } // Add the framedetached event if the connection is over WebSocket if (envs.OTHER_CONNECTION_OVER_PIPE === false) { - resource.page.on('framedetached', async (frame) => { + poolResource.page.on('framedetached', async (frame) => { // Get the main frame - const mainFrame = resource.page.mainFrame(); - - // Check if a page's frame is detached and doesn't require to be recreated - if (frame === mainFrame && mainFrame.detached) { + const mainFrame = poolResource.page.mainFrame(); + + // Check if a page's frame is detached and requires to be recreated + if ( + frame === mainFrame && + mainFrame.detached && + poolResource.workCount <= pool.workLimit + ) { log(3, `[browser] The page's frame detached.`); try { - /// - // expBackoff( - // async (resource) => { - // try { - // await resource.page.close(); - // } catch (error) { - // logWithStack( - // 1, - // error, - // '[browser] Could not close the page with a detached frame.' - // ); - // } - // await newPage(resource); - // }, - // 0, - // resource - // ); - /// // Try to connect to a new page using exponential backoff strategy - expBackoff(newPage, 0, resource); - } catch (error) { - log( - 3, - '[browser] Could not create a new page. Shutting server down.' + expBackoff( + async (poolResourceId, poolResource) => { + try { + // Try to close the page with a detached frame + if (!poolResource.page.isClosed()) { + await poolResource.page.close(); + } + } catch (error) { + log( + 3, + `[browser] Could not close the page with a detached frame. Page already closed, id: ${poolResourceId}.` + ); + } + + // Trigger a page creation + await newPage(poolResource); + }, + 0, + poolResource.id, + poolResource ); - throw error; + } catch (error) { + log(3, '[browser] Could not create a new page.'); + poolResource.page = null; } } }); diff --git a/lib/pool.js b/lib/pool.js index 75547c8f..b31058ff 100644 --- a/lib/pool.js +++ b/lib/pool.js @@ -54,13 +54,13 @@ const factory = { */ create: async () => { try { - const pageResource = { + const poolResource = { id: uuid(), // Try to distribute the initial work count workCount: Math.round(Math.random() * (poolConfig.workLimit / 2)) }; - return await newPage(pageResource); + return await newPage(poolResource); } catch (error) { throw new ExportError( 'Error encountered when creating a new page.', @@ -114,7 +114,7 @@ const factory = { validated = false; } - /// + /// TO DO: Test the explicit creation of a resource // // Check if a resource needs to be created explicitly // if ( // !validated && @@ -141,6 +141,11 @@ const factory = { if (workerHandle.page) { try { + // Remove all attached event listeners from the resource + workerHandle.page.removeAllListeners('pageerror'); + workerHandle.page.removeAllListeners('console'); + workerHandle.page.removeAllListeners('framedetached'); + // We need to wait around for this await workerHandle.page.close(); } catch (error) { @@ -207,6 +212,7 @@ export const initPool = async (config) => { pool.on('destroySuccess', (_eventId, resource) => { log(4, `[pool] Destroyed a worker with ID ${resource.id}.`); + resource.page = null; }); const initialResources = []; @@ -254,6 +260,11 @@ export async function killPool() { pool.release(worker.resource); } + // Remove all attached event listeners from the pool + pool.removeAllListeners('release'); + pool.removeAllListeners('destroySuccess'); + pool.removeAllListeners('destroyFail'); + // Destroy the pool if it is still available if (!pool.destroyed) { await pool.destroy(); @@ -324,8 +335,8 @@ export const postWork = async (chart, options) => { log(4, '[pool] Acquired a worker handle.'); if (!workerHandle.page) { - pool.release(workerHandle); - workerHandle = null; + // Release the resource with exceeded `workLimit` in order to recreate + workerHandle.workCount = poolConfig.workLimit + 1; throw new ExportError( 'Resolved worker page is invalid: the pool setup is wonky.', 500 @@ -343,12 +354,9 @@ export const postWork = async (chart, options) => { // Check if it's an error if (result instanceof Error) { - // TODO: If the export failed because puppeteer timed out, we need to force kill the worker so we get a new page. That needs to be handled better than this hack. if (result.message === 'Rasterization timeout') { // Release the resource with exceeded `workLimit` in order to recreate workerHandle.workCount = poolConfig.workLimit + 1; - pool.release(workerHandle); - workerHandle = null; } throw new ExportError( diff --git a/lib/resource_release.js b/lib/resource_release.js index 1d8e85ce..10d47428 100644 --- a/lib/resource_release.js +++ b/lib/resource_release.js @@ -15,6 +15,7 @@ See LICENSE file in root for details. import { clearAllIntervals } from './intervals.js'; import { killPool } from './pool.js'; import { closeServers } from './server/server.js'; +import { get as getBrowser } from './browser.js'; /** * Clean up function to trigger before ending process for the graceful shutdown. @@ -22,6 +23,9 @@ import { closeServers } from './server/server.js'; * @param {number} exitCode - An exit code for the process.exit() function. */ export const shutdownCleanUp = async (exitCode) => { + // Remove all attached event listeners from the browser + getBrowser().removeAllListeners('disconnected'); + // Await freeing all resources await Promise.allSettled([ // Clear all ongoing intervals