return async (context) => { if (context.request.label == requestLabels.article) { context.page.on('request', async (req) => { if (req.resourceType() == 'image') { const response = await req.response(); // extra processing and save to cloud
res.response()
can be collected. This causes an error since the browser context closes with the main request handler. My question is how can i best get around this? One idea I had was that I could put a promise in the page userdata that resolves when the page has no outstanding images. After reading the docs however im not sure if this is possible since userdata needs to be serialisable? Has anyone else encountered this type of issue and how have they got around it?{ "service": "AutoscaledPool", "time": "2024-10-30T16:42:17.049Z", "id": "cae4950d568a4b8bac375ffa5a40333c", "jobId": "9afee408-42bf-4194-b17c-9864db707e5c", "currentConcurrency": "4", "desiredConcurrency": "5", "systemStatus": "{\"isSystemIdle\":true,\"memInfo\":{\"isOverloaded\":false,\"limitRatio\":0.2,\"actualRatio\":0},\"eventLoopInfo\":{\"isOverloaded\":false,\"limitRatio\":0.6,\"actualRatio\":0},\"cpuInfo\":{\"isOverloaded\":false,\"limitRatio\":0.4,\"actualRatio\":0},\"clientInfo\":{\"isOverloaded\":false,\"limitRatio\":0.3,\"actualRatio\":0}}" }
{ "rejection": "true", "date": "Wed Oct 30 2024 16:42:38 GMT+0000 (Coordinated Universal Time)", "process": "{\"pid\":1,\"uid\":997,\"gid\":997,\"cwd\":\"/home/myuser\",\"execPath\":\"/usr/local/bin/node\",\"version\":\"v22.9.0\",\"argv\":[\"/usr/local/bin/node\",\"/home/myuser/FIDO-Scraper-Discovery\"],\"memoryUsage\":{\"rss\":337043456,\"heapTotal\":204886016,\"heapUsed\":168177928,\"external\":30148440,\"arrayBuffers\":14949780}}", "os": "{\"loadavg\":[3.08,3.38,3.68],\"uptime\":312222.44}", "stack": "response.headerValue: Target page, context or browser has been closed\n at Page.<anonymous> (/home/myuser/FIDO-Scraper-Discovery/dist/articleImagesPreNavHook.js:15:60)" }
azure:service-bus:receiver:warning [connection-1|streaming:discovery-8ffea0b6-f055-c04e-88ae-f31f039f2c24] Abandoning the message with id '656b7051a08b4b759087c40d0ecef687' on the receiver 'discovery-8ffea0b6-f055-c04e-88ae-f31f039f2c24' since an error occured: browserType.launch: Executable doesn't exist at /home/myuser/pw-browsers/chromium-1129/chrome-linux/chrome ╔═════════════════════════════════════════════════════════════════════════╗ ║ Looks like Playwright Test or Playwright was just installed or updated. ║ ║ Please run the following command to download new browsers: ║ ║ ║ ║ npx playwright install ║ ║ ║ ║ <3 Playwright Team ║ ╚═════════════════════════════════════════════════════════════════════════╝
serve-file
and file
changes regularly. chromium.use(stealthPlugin()); const router = createPlaywrightRouter(); router.addHandler( requestLabels.SPIDER, spiderDiscoveryHandlerFactory(container), ); router.addHandler(requestLabels.ARTICLE, articleHandlerFactory(container)); const config = new Configuration({ storageClient: new MemoryStorage({ localDataDirectory: `./storage/${message.messageId}`, writeMetadata: true, persistStorage: true, }), persistStateIntervalMillis: 5000, persistStorage: true, purgeOnStart: false, headless: false, }); const crawler = new PlaywrightCrawler( { launchContext: { launcher: chromium, }, requestHandler: router, errorHandler: (_request, error) => { logger.error(`${error.name}\n${error.message}`); }, maxRequestsPerCrawl: body.config.maxRequests > 0 ? body.config.maxRequests : undefined, useSessionPool: true, persistCookiesPerSession: true, }, config, );
{ "level": "info", "service": "AutoscaledPool", "message": "state", "id": "5b83448e57d74571921de06df2d980f2", "jobId": "testPayload4", "currentConcurrency": 1, "desiredConcurrency": 1, "systemStatus": { "isSystemIdle": false, "memInfo": { "isOverloaded": true, "limitRatio": 0.2, "actualRatio": 1 }, "eventLoopInfo": { "isOverloaded": false, "limitRatio": 0.6, "actualRatio": 0.019 }, "cpuInfo": { "isOverloaded": false, "limitRatio": 0.4, "actualRatio": 0 }, "clientInfo": { "isOverloaded": false, "limitRatio": 0.3, "actualRatio": 0 } } }
{ "level": "warning", "service": "Snapshotter", "message": "Memory is critically overloaded. Using 7164 MB of 6065 MB (118%). Consider increasing available memory.", "id": "5b83448e57d74571921de06df2d980f2", "jobId": "testPayload4" }