Hey y'all, so, basically I'm trying to see if the response is application/pdf, then, it should timeout immediately and ideally skipRequest.
async (crawlingContext, gotoOptions) => {
const { page, request, crawler } = crawlingContext
const queue = await crawler.getRequestQueue()
const crawler_dto = request.userData.crawler_dto
if (!request.url.endsWith('.pdf')) {
gotoOptions.waitUntil = 'networkidle2'
gotoOptions.timeout = 20000
await page.setBypassCSP(true)
await page.setExtraHTTPHeaders({
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
})
await page.setViewport({ width: 1440, height: 900 })
}
page.on('response', async (page_response) => {
if (page_response.headers()['content-type'] === 'application/pdf') {
gotoOptions.timeout = 1
}
})
},