const crawler = new PuppeteerCrawler({ proxyConfiguration, requestHandler: router, maxRequestRetries: 25, requestList: await RequestList.open(null, [initUrl]), requestHandlerTimeoutSecs: 2000, maxConcurrency: 1, }, config);
RequestQueue
to add productUrls, and they're being handled inside the defaultRequestHandler
, but when some of them fails, I purposely throw an Error, expecting the failed request(which is the initUrl
) goes back to RequestList
, but it goes to the default RequestQueue
too, which is not what I want.preNavigationHooks: [
async (crawlingContext, gotoOptions) => {
const { page, request, log } = crawlingContext;
gotoOptions.waitUntil = 'load';
if (isProductUrl) {
page.on('response', async (response) => {
if (response.request().url().includes('productdetail')) {
try {
const data = await response.json();
await Actor.pushData(data);
await defaultQueue.markRequestHandled(request);
page.removeAllListeners('response');
await page.close();
} catch (err) {
log.error(err);
}
}
});
}
},
]
WARN PuppeteerCrawler: Reclaiming failed request back to the list or queue. Navigation failed because browser has disconnected!
await page.close();
line I get this error:WARN PuppeteerCrawler: Reclaiming failed request back to the list or queue. requestHandler timed out after 130 seconds (o4wrbxkzgU1eP2n).
router.addDefaultHandler(async ({ request }) => {
if (searchPageUrlPattern.test(request.url)) {
// Enqueue links...
}
});