async def main() -> None: async with Actor: # Read the Actor input actor_input = await Actor.get_input() or {} start_urls = actor_input.get("start_urls", []) max_depth = actor_input.get("max_depth", 1) proxy_configuration = await Actor.create_proxy_configuration() proxy_url = await proxy_configuration.new_url() if not start_urls: Actor.log.info("No start URLs specified in Actor input, exiting...") await Actor.exit() # Enqueue the starting URLs in the default request queue default_queue = await Actor.open_request_queue() for start_url in start_urls: url = start_url.get("url") Actor.log.info(f"Enqueuing {url} ...") await default_queue.add_request({"url": url, "userData": {"depth": 0}}) Actor.log.info("Launching Playwright...") async with async_playwright() as playwright: browser = await playwright.chromium.launch(headless=Actor.config.headless) context = await browser.new_context() while request := await default_queue.fetch_next_request(): url = request["url"] depth = request["userData"]["depth"] Actor.log.info(f"Scraping {url} ...") Actor.log.info("Launching Playwright...") try: page = await context.new_page() await page.goto(url) content = await page.content() soup = BeautifulSoup(content, "html.parser") content_seller = AmzonSellerInfosParser(url=url, soup=soup).parse() await Actor.push_data(content_seller) except Exception: Actor.log.exception(f"Cannot extract data from {url}.") finally: await page.close()