@Pepa J Not sure if this reproduces it, in my case it lead to the described result:
const crawler = new HttpCrawler(
{
maxConcurrency: 2,
maxRequestsPerMinute: 180,
...options,
proxyConfiguration,
useSessionPool: true,
persistCookiesPerSession: true,
retryOnBlocked: true,
additionalMimeTypes: ["text/plain", "application/pdf"],
async requestHandler({ pushData, request, response }) {
await pushData({
url: request.url,
statusCode: response.statusCode,
});
},
async failedRequestHandler({ pushData, request, response }) {
log.error(`Request for URL "${request.url}" failed.`);
await pushData({
url: request.url,
statusCode: response?.statusCode ?? 0,
});
},
async errorHandler({ request }, { message }) {
log.error(`Request failed with ${message}`);
if (!request.noRetry) {
const baseWaitTime = Math.pow(2, request.retryCount) * 1000;
const jitter = baseWaitTime * (Math.random() - 0.5);
const waitTime = baseWaitTime + jitter;
await new Promise((resolve) => setTimeout(resolve, waitTime));
}
},
},
config,
);
crawler.run(urls)
Nothing really special. I have two proxies in my configuration one in tier1 and the second in tier2.