Apify and Crawlee Official Forum

Updated 3 months ago

How can i use proxy with playwright apify

hi I'm trying to make a scraper and i don't now how to implement a proxy hosted by apify in my script i share you a code to see why i'm trying to do
k
M
2 comments
Plain Text
async def main() -> None:
    async with Actor:
        # Read the Actor input

        actor_input = await Actor.get_input() or {}
        start_urls = actor_input.get("start_urls", [])
        max_depth = actor_input.get("max_depth", 1)
        proxy_configuration = await Actor.create_proxy_configuration()
        proxy_url = await proxy_configuration.new_url()

        if not start_urls:
            Actor.log.info("No start URLs specified in Actor input, exiting...")
            await Actor.exit()

        # Enqueue the starting URLs in the default request queue
        default_queue = await Actor.open_request_queue()

        for start_url in start_urls:
            url = start_url.get("url")
            Actor.log.info(f"Enqueuing {url} ...")
            await default_queue.add_request({"url": url, "userData": {"depth": 0}})

        Actor.log.info("Launching Playwright...")
        async with async_playwright() as playwright:
            browser = await playwright.chromium.launch(headless=Actor.config.headless)
            context = await browser.new_context()

            while request := await default_queue.fetch_next_request():
                url = request["url"]
                depth = request["userData"]["depth"]

                Actor.log.info(f"Scraping {url} ...")

                Actor.log.info("Launching Playwright...")

                try:
                    page = await context.new_page()
                    await page.goto(url)
                    content = await page.content()

                    soup = BeautifulSoup(content, "html.parser")
                    content_seller = AmzonSellerInfosParser(url=url, soup=soup).parse()
                    await Actor.push_data(content_seller)

                except Exception:
                    Actor.log.exception(f"Cannot extract data from {url}.")
                finally:
                    await page.close()
Hi. I don't see in your code that you pass the proxy_url to playwright.

You can find out how to do this in the official playwright documentation - https://playwright.dev/python/docs/network#http-proxy
Add a reply
Sign up and join the conversation on Discord