const proxyConfiguration = await Actor.createProxyConfiguration({ groups: ['residential'], }); const crawler = new PlaywrightCrawler({ proxyConfiguration, navigationTimeoutSecs: 180, handleRequestTimeoutSecs: 180, launchContext: { useIncognitoPages: true, launcher: firefox, }, browserPoolOptions: { useFingerprints: true, prePageCreateHooks: [ (_pageId, _browserController, pageOptions = {}) => { const width = 1280 + Math.floor(Math.random() * 640); const height = 720 + Math.floor(Math.random() * 360); const locales = ['en-US', 'en-GB', 'fr-FR', 'de-DE', 'es-ES', 'it-IT', 'nl-NL', 'pt-BR']; const timezones = [ 'America/New_York', 'Europe/London', 'Europe/Paris', 'Europe/Berlin', 'Europe/Amsterdam', 'Asia/Tokyo', 'Asia/Singapore', 'Australia/Sydney', ]; Object.assign(pageOptions, { locale: locales[Math.floor(Math.random() * locales.length)], timezoneId: timezones[Math.floor(Math.random() * timezones.length)], viewport: { width, height }, screen: { width, height }, colorScheme: Math.random() > 0.5 ? 'light' : 'dark', deviceScaleFactor: [1, 1.25, 1.5, 2][Math.floor(Math.random() * 4)], isMobile: Math.random() > 0.9, hasTouch: Math.random() > 0.7, }); }, ], },
preNavigationHooks: [ async ({ page }) => { await page.addInitScript(() => { const randomInt = (min: number, max: number) => Math.floor(Math.random() * (max - min + 1)) + min; Object.defineProperties(Navigator.prototype, { webdriver: { get: () => undefined }, plugins: { get: () => { const pluginCount = randomInt(3, 10); return new Array(pluginCount).fill({ name: ['PDF Viewer', 'Chrome PDF Viewer', 'WebKit built-in PDF'][Math.floor(Math.random() * 3)], description: 'Portable Document Format', filename: 'internal-pdf-viewer', }); }, }, platform: { get: () => ['Win32', 'MacIntel', 'Linux x86_64', 'Linux aarch64'][Math.floor(Math.random() * 4)], }, hardwareConcurrency: { get: () => randomInt(4, 16) }, deviceMemory: { get: () => [2, 4, 8, 16][Math.floor(Math.random() * 4)] }, languages: { get: () => ['en-US', 'en', 'es', 'fr'].slice(0, randomInt(1, 3)), }, maxTouchPoints: { get: () => (Math.random() > 0.7 ? randomInt(1, 5) : 0) }, }); interface ExtendedWindow extends Window { cdc_adoQpoasnfa76pfcZLmcfl_Array?: unknown; cdc_adoQpoasnfa76pfcZLmcfl_Promise?: unknown; cdc_adoQpoasnfa76pfcZLmcfl_Symbol?: unknown; }
delete (window as ExtendedWindow).cdc_adoQpoasnfa76pfcZLmcfl_Array; delete (window as ExtendedWindow).cdc_adoQpoasnfa76pfcZLmcfl_Promise; delete (window as ExtendedWindow).cdc_adoQpoasnfa76pfcZLmcfl_Symbol; const originalGetEntries = Performance.prototype.getEntries; Performance.prototype.getEntries = function () { const entries = originalGetEntries.call(this); return entries.map((entry) => { const randomOffset = Math.random() * 50; return { ...entry, startTime: entry.startTime + randomOffset, duration: entry.duration + (Math.random() * 20), }; }); }; }); }, ], requestHandler: async ({ page }) => { await handleRequest(page); }, });
# Specify the base Docker image. You can read more about # the available images at https://crawlee.dev/docs/guides/docker-images # You can also use any other image from Docker Hub. FROM apify/actor-node-playwright-firefox:20 AS builder # Check preinstalled packages RUN npm ls crawlee apify puppeteer playwright # Copy just package.json and package-lock.json # to speed up the build using Docker layer cache. COPY --chown=myuser package*.json ./ # Install all dependencies. Don't audit to speed up the installation. RUN npm install --include=dev --audit=false # Next, copy the source files using the user set # in the base image. COPY --chown=myuser . ./ # Install all dependencies and build the project. # Don't audit to speed up the installation. RUN npm run build # Create final image FROM apify/actor-node-playwright-firefox:20 # Check preinstalled packages RUN npm ls crawlee apify puppeteer playwright # Copy just package.json and package-lock.json # to speed up the build using Docker layer cache. COPY --chown=myuser package*.json ./ # Install NPM packages, skip optional and development dependencies to # keep the image small. RUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm # Copy built JS files from builder image COPY --from=builder --chown=myuser /home/myuser/dist ./dist # Next, copy the remaining files and directories with the source code. # Since we do this after NPM install, quick build will be really fast # for most source file changes. COPY --chown=myuser . ./ # Run the image. If you know you won't need headful browsers, # you can remove the XVFB start script for a micro perf gain. CMD ./start_xvfb_and_run_cmd.sh && npm run start:prod --silent
apify run
? You could also try the RESIDENTIAL5
proxy tier, which works better but is more expensive.apify run
and a headless browser. If I change to RESIDENTIAL5 proxies it no longer works locally, but with just RESIDENTIAL it does work locally. In neither case does it work on the Apify platform. If you want I can add you to the GitHub with the code if you want to have a look?