Cheerio is not able to persist cookies that are set in the session. I have
persistCookiesPerSession: true
and I also verify that the cookie is being saved in the session in the
requestHandler
. But when i print out the request headers the
cookie
header is not present. The
session
in
preNavigationHooks
also does not contain the cookies
const crawler = new CheerioCrawler({
minConcurrency: 1,
maxConcurrency: 10,
requestHandlerTimeoutSecs: 30,
maxRequestRetries: 10,
useSessionPool: true,
persistCookiesPerSession: true,
preNavigationHooks: [
async ({ request, session }, gotOptions) => {
gotOptions.useHeaderGenerator = true;
gotOptions.headerGeneratorOptions = {
browsers: [{ name: 'firefox', minVersion: 115, maxVersion: 115 }],
devices: ['desktop'],
operatingSystems: ['windows'],
locales: ['en-US', 'en'],
};
console.log('START PRE HOOK');
console.log(request.url);
// THIS IS EMPTY ON SECOND REQUEST
console.log(session?.getCookies(request.url));
console.log(gotOptions.headers);
console.log('END PRE HOOK');
},
],
requestHandler: async ({ response, request, session, log, addRequests }) => {
const refresh = response.headers?.refresh;
if (refresh && session) {
console.log(response.request.options.headers);
log.info(`Access queue detected, waiting for ${refresh} seconds...`);
// Cookies are present here
console.log(session.getCookies(request.url));
await sleep((parseInt(refresh) - 1) * 1000);
await addRequests([{ url: request.url, uniqueKey: new Date().toString() }]);
}
},
});