cookiesStore = await page.cookies(page.url());
DEBUG Error while disabling request interception {"error":{"name":"TargetCloseError","message":"Protocol error (Network.setCacheDisabled): Target closed","stack":"TargetCloseError: Protocol error (Network.setCacheDisabled): Target closed\n at CallbackRegistry.clear (project/node_modules/puppeteer-core/lib/cjs/puppeteer/common/Connection.js:138:36)\n at CDPSessionImpl._onClosed (project/node_modules/puppeteer-core/lib/cjs/puppeteer/common/Connection.js:451:25)\n at Connection.onMessage (project/node_modules/puppeteer-core/lib/cjs/puppeteer/common/Connection.js:248:25)\n at WebSocket.<anonymous> (project/node_modules/puppeteer-core/lib/cjs/puppeteer/common/NodeWebSocketTransport.js:52:32)\n at callListener (project/node_modules/ws/lib/event-target.js:290:14)\n at WebSocket.onMessage (project/node_modules/ws/lib/event-target.js:209:9)\n at WebSocket.emit (node:events:365:28)\n at Receiver.receiverOnMessage (project/node_modules/ws/lib/websocket.js:1184:20)\n at Receiver.emit (node:events:365:28)\n at Receiver.dataMessage (project/node_modules/ws/lib/receiver.js:541:14)"}}
WARN CheerioCrawler:AutoscaledPool:Snapshotter: Memory is critically overloaded. Using 1174 MB of 750 MB (157%). Consider increasing available memory.
const blockedResourceTypes = ['webp', 'svg', 'mp4', 'jpeg', 'gif', 'avif', 'font'] const crawler = new PuppeteerCrawler({ launchContext: { launchOptions: { headless: false, devtools: true, defaultViewport:{ width: 1920, height: 6000 }, args: [ '--disable-dev-shm-usage', ] }, useIncognitoPages: true, }, proxyConfiguration, requestHandler: router, maxConcurrency: 16, maxRequestRetries: 15, maxRequestsPerMinute: 2, navigationTimeoutSecs: 120, useSessionPool: true, failedRequestHandler({ request }) { log.debug(`Request ${request.url} failed 15 times.`); }, preNavigationHooks: [ async ({ addInterceptRequestHandler }) => { await addInterceptRequestHandler((request) => { if (blockedResourceTypes.includes(request.resourceType())) { return request.respond({ status: 200, body: 'useless shit', }); } return request.continue(); }); }, ], });
const requestList = await RequestList.open('My-ReqList', allUrls, { persistStateKey: 'My-ReqList' }); console.log(requestList.length()) const crawler = new CheerioCrawler({ requestList, proxyConfiguration, requestHandler: router, minConcurrency: 32, maxConcurrency: 256, maxRequestRetries: 20, navigationTimeoutSecs: 6, loggingInterval: 30, useSessionPool: true, failedRequestHandler({ request }) { log.debug(`Request ${request.url} failed 20 times.`); }, }); await crawler.run()
Cannot find module 'crawlee'. Did you mean to set the 'moduleResolution' option to 'nodenext', or to add aliases to the 'paths' option?ts(2792)
await utils.puppeteer.enqueueLinksByClickingElements({ page, requestQueue: RequestQueue.open(), selector: 'li.pagination_next', label: 'category', forefront: true });
Reclaiming failed request back to the list or queue. Expected property object `requestQueue` to have keys `["fetchNextRequest","addRequest"]` in object `options`
> node src/main.js DEBUG CheerioCrawler:SessionPool: No 'persistStateKeyValueStoreId' options specified, this session pool's data has been saved in the KeyValueStore with the id: ee911a9c-b90e-412e-af5b-a470b0172ba8 INFO CheerioCrawler: Starting the crawl ERROR Memory snapshot failed. Error: spawn ps ENOENT at ChildProcess._handle.onexit (node:internal/child_process:283:19) at onErrorNT (node:internal/child_process:476:16) at process.processTicksAndRejections (node:internal/process/task_queues:82:21) DEBUG CheerioCrawler:SessionPool: Persisting state {"persistStateKey":"SDK_SESSION_POOL_STATE"} DEBUG Statistics: Persisting state {"persistStateKey":"SDK_CRAWLER_STATISTICS_0"} DEBUG CheerioCrawler:SessionPool: Persisting state {"persistStateKey":"SDK_SESSION_POOL_STATE"} DEBUG Statistics: Persisting state {"persistStateKey":"SDK_CRAWLER_STATISTICS_0"} DEBUG Statistics: Persisting state {"persistStateKey":"SDK_CRAWLER_STATISTICS_0"} node:internal/errors:490 ErrorCaptureStackTrace(err); ^ Error: spawn ps ENOENT at ChildProcess._handle.onexit (node:internal/child_process:283:19) at onErrorNT (node:internal/child_process:476:16) at process.processTicksAndRejections (node:internal/process/task_queues:82:21) { errno: -2, code: 'ENOENT', syscall: 'spawn ps', path: 'ps', spawnargs: [ '-A', '-o', 'ppid,pid,stat,rss,comm' ] } Node.js v18.16.0
if (request.loadedUrl === 'url-from-where-i-get-cookies'){ goodCokies = session.getCookies('url-from-where-i-get-cookies') await crawler.addRequests(['url-where-i-need-cookies']) return } await page.setCookie(goodCokies)
Reclaiming failed request back to the list or queue. Protocol error (Network.deleteCookies): Invalid parameters Failed to deserialize params.name - BINDINGS: mandatory field missing at position 2081
await puppeteerClickElements.enqueueLinksByClickingElements({ forefront: true, selector: 'a.js-color-change' })
Reclaiming failed request back to the list or queue. Expected property `page` to be of type `object` but received type `undefined` Expected object `page` to have keys `["goto","evaluate"]` in object `options`