正如您在下面的示例代码中看到的那样,我正在使用Puppeteer与Node中的一组工作人员按给定的URL运行多个网站截图请求: const cluster = require('cluster');const express = require('express');const bodyPars
const cluster = require('cluster'); const express = require('express'); const bodyParser = require('body-parser'); const puppeteer = require('puppeteer'); async function getScreenshot(domain) { let screenshot; const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'] }); const page = await browser.newPage(); try { await page.goto('http://' + domain + '/', { timeout: 60000, waitUntil: 'networkidle2' }); } catch (error) { try { await page.goto('http://' + domain + '/', { timeout: 120000, waitUntil: 'networkidle2' }); screenshot = await page.screenshot({ type: 'png', encoding: 'base64' }); } catch (error) { console.error('Connecting to: ' + domain + ' failed due to: ' + error); } await page.close(); await browser.close(); return screenshot; } if (cluster.isMaster) { const numOfWorkers = require('os').cpus().length; for (let worker = 0; worker < numOfWorkers; worker++) { cluster.fork(); } cluster.on('exit', function (worker, code, signal) { console.debug('Worker ' + worker.process.pid + ' died with code: ' + code + ', and signal: ' + signal); Cluster.fork(); }); cluster.on('message', function (handler, msg) { console.debug('Worker: ' + handler.process.pid + ' has finished working on ' + msg.domain + '. Exiting...'); if (Cluster.workers[handler.id]) { Cluster.workers[handler.id].kill('SIGTERM'); } }); } else { const app = express(); app.use(bodyParser.json()); app.listen(80, function() { console.debug('Worker ' + process.pid + ' is listening to incoming messages'); }); app.post('/screenshot', (req, res) => { const domain = req.body.domain; getScreenshot(domain) .then((screenshot) => try { process.send({ domain: domain }); } catch (error) { console.error('Error while exiting worker ' + process.pid + ' due to: ' + error); } res.status(200).json({ screenshot: screenshot }); }) .catch((error) => { try { process.send({ domain: domain }); } catch (error) { console.error('Error while exiting worker ' + process.pid + ' due to: ' + error); } res.status(500).json({ error: error }); }); }); }
一些解释:
>每次请求到达时,工人都会处理它并在最后自杀
>每个工作人员使用单个页面创建一个新的浏览器实例,如果一个页面加载超过60秒,它将重试重新加载它(在同一页面中,因为可能已经加载了一些资源),超时为120秒
>完成后,页面和浏览器都将关闭
我的问题是,一些合法的域名出现了我无法解释的错误:
Error: Protocol error (Page.navigate): Target closed.
Error: Protocol error (Runtime.callFunctionOn): Session closed. Most likely the page has been closed.
我读了一些git问题(我现在找不到),当页面重定向并在开始时添加’www’时会发生这种情况,但我希望它是假的……
有什么我想念的吗?
您可能想要查看处理这些错误情况的库puppeteer-cluster,然后让您重试URL.它可以管理一组浏览器实例,甚至可以简化您的代码. (免责声明:我是作者)