async def main() -> None: async with Actor: crawler = BeautifulSoupCrawler() url = "https://www.MY_URL.com?gridFilterType=0&homeAwayFilterType=0&sortBy=0&nearbyGridRadius=50&venueIdFilterType=0&eventViewType=0&opponentCategoryId=0&pageIndex=1&method=GetFilteredEvents&categoryId=4555genreId=undefined&eventCountryType=0&fromPrice=undefined&toPrice=undefined" initial_req = Request.from_url( method="POST", url=str(url), ) @crawler.router.default_handler async def default_handler(context: BeautifulSoupCrawlingContext) -> None: context.log.info(f"Processing {context.request.url}") await context.push_data(context.request.model_dump_json()) # Run the crawler await crawler.run([initial_req])
{ "url": "MY_URL", "unique_key": "MY_URL", "method": "POST", "headers": {}, "query_params": {}, "payload": null, "data": {}, "user_data": { "__crawlee": { "state": 3 } }, "retry_count": 0, "no_retry": false, "loaded_url": "MY_URL", "handled_at": null, "id": "iEYRVLtHdfdR7s6", "json_": null, "order_no": null }
httpbin.org/post
{ "args": { "categoryId": "4555genreId=undefined", "eventCountryType": "0", "eventViewType": "0", "fromPrice": "undefined", "gridFilterType": "0", "homeAwayFilterType": "0", "method": "GetFilteredEvents", "nearbyGridRadius": "50", "opponentCategoryId": "0", "pageIndex": "1", "sortBy": "0", "toPrice": "undefined", "venueIdFilterType": "0" }, "data": "", "files": {}, "form": {}, "headers": { "Accept": "*/*", "Accept-Encoding": "gzip, deflate, br", "Content-Length": "0", "Host": "httpbin.org", "User-Agent": "python-httpx/0.27.2", "X-Amzn-Trace-Id": "Root=1-67100e24-37616e605f9cf31e5538556b" }, "json": null, "origin": "91.240.96.149", "url": "https://httpbin.org/post?gridFilterType=0&homeAwayFilterType=0&sortBy=0&nearbyGridRadius=50&venueIdFilterType=0&eventViewType=0&opponentCategoryId=0&pageIndex=1&method=GetFilteredEvents&categoryId=4555genreId%3Dundefined&eventCountryType=0&fromPrice=undefined&toPrice=undefined" }
args
&
before the genreId
parameterurl = "https://www.MY_URL.com?gridFilterType=0&homeAwayFilterType=0&sortBy=0&nearbyGridRadius=50&venueIdFilterType=0&eventViewType=0&opponentCategoryId=0&pageIndex=1&method=GetFilteredEvents&categoryId=4555&genreId=undefined&eventCountryType=0&fromPrice=undefined&toPrice=undefined"
async def main() -> None: async with Actor: crawler = BeautifulSoupCrawler() url = "https://www.viagogo.com/Concert-Tickets/Pop-Rock/Dance-Pop/Shakira-Tickets?gridFilterType=0&homeAwayFilterType=0&sortBy=0&nearbyGridRadius=50&venueIdFilterType=0&eventViewType=0&opponentCategoryId=0&pageIndex=1&method=GetFilteredEvents&categoryId=4555&radiusFrom=80467&radiusTo=null&from=1970-01-01T00%3A00%3A00.000Z&to=9999-12-30T23%3A00%3A00.000Z&lat=39.044&lon=-77.488&genreId=undefined&eventCountryType=0&fromPrice=undefined&toPrice=undefined" initial_req = Request.from_url( method="POST", url=str(url), ) @crawler.router.default_handler async def default_handler(context: BeautifulSoupCrawlingContext) -> None: context.log.info(f"Processing {context.request.url}") await context.push_data(context.request.model_dump_json()) await crawler.run([initial_req])
{ "url": "https://www.viagogo.com/Concert-Tickets/Pop-Rock/Dance-Pop/Shakira-Tickets?gridFilterType=0&homeAwayFilterType=0&sortBy=0&nearbyGridRadius=50&venueIdFilterType=0&eventViewType=0&opponentCategoryId=0&pageIndex=1&method=GetFilteredEvents&categoryId=4555&radiusFrom=80467&radiusTo=null&from=1970-01-01T00%3A00%3A00.000Z&to=9999-12-30T23%3A00%3A00.000Z&lat=39.044&lon=-77.488&genreId=undefined&eventCountryType=0&fromPrice=undefined&toPrice=undefined", "unique_key": "https://www.viagogo.com/concert-tickets/pop-rock/dance-pop/shakira-tickets?categoryid=4555&eventcountrytype=0&eventviewtype=0&from=1970-01-01t00%3a00%3a00.000z&fromprice=undefined&genreid=undefined&gridfiltertype=0&homeawayfiltertype=0&lat=39.044&lon=-77.488&method=getfilteredevents&nearbygridradius=50&opponentcategoryid=0&pageindex=1&radiusfrom=80467&radiusto=null&sortby=0&to=9999-12-30t23%3a00%3a00.000z&toprice=undefined&venueidfiltertype=0", "method": "POST", "headers": {}, "query_params": {}, "payload": null, "data": {}, "user_data": { "__crawlee": { "state": 3 } }, "retry_count": 0, "no_retry": false, "loaded_url": "https://www.viagogo.com/Concert-Tickets/Pop-Rock/Dance-Pop/Shakira-Tickets?gridFilterType=0&homeAwayFilterType=0&sortBy=0&nearbyGridRadius=50&venueIdFilterType=0&eventViewType=0&opponentCategoryId=0&pageIndex=1&method=GetFilteredEvents&categoryId=4555&radiusFrom=80467&radiusTo=null&from=1970-01-01T00%3A00%3A00.000Z&to=9999-12-30T23%3A00%3A00.000Z&lat=39.044&lon=-77.488&genreId=undefined&eventCountryType=0&fromPrice=undefined&toPrice=undefined", "handled_at": null, "id": "iEYRVLtHdfdR7s6", "json_": null, "order_no": null }
httpx
:resp = httpx.post(url) print(resp.json() > output: {'items': [{'eventId': 153433356, 'name': 'Shakira', 'url': 'https://www.viagogo.com/Concert-Tickets/Pop-Rock/Dance-Pop/Shakira-Tickets/E-153433356', 'dayOfWeek': 'Wed', ... }
context.request.model_dump_json()
- as you can see, it outputs the Request metadata, which does not include the server responseasync def main() -> None: async with Actor: crawler = BeautifulSoupCrawler() url = "https://www.viagogo.com/Concert-Tickets/Pop-Rock/Dance-Pop/Shakira-Tickets?gridFilterType=0&homeAwayFilterType=0&sortBy=0&nearbyGridRadius=50&venueIdFilterType=0&eventViewType=0&opponentCategoryId=0&pageIndex=1&method=GetFilteredEvents&categoryId=4555&radiusFrom=80467&radiusTo=null&from=1970-01-01T00%3A00%3A00.000Z&to=9999-12-30T23%3A00%3A00.000Z&lat=39.044&lon=-77.488&genreId=undefined&eventCountryType=0&fromPrice=undefined&toPrice=undefined" initial_req = Request.from_url( method="POST", url=str(url), ) @crawler.router.default_handler async def default_handler(context: BeautifulSoupCrawlingContext) -> None: context.log.info(f"Processing {context.request.url}") await context.push_data(context.soup.find("p").text) await crawler.run([initial_req])
from apify import Actor from crawlee import Request from crawlee.http_crawler import HttpCrawler, HttpCrawlingContext import json async def main() -> None: async with Actor: crawler = HttpCrawler() url = "https://www.viagogo.com/Concert-Tickets/Pop-Rock/Dance-Pop/Shakira-Tickets?gridFilterType=0&homeAwayFilterType=0&sortBy=0&nearbyGridRadius=50&venueIdFilterType=0&eventViewType=0&opponentCategoryId=0&pageIndex=1&method=GetFilteredEvents&categoryId=4555&radiusFrom=80467&radiusTo=null&from=1970-01-01T00%3A00%3A00.000Z&to=9999-12-30T23%3A00%3A00.000Z&lat=39.044&lon=-77.488&genreId=undefined&eventCountryType=0&fromPrice=undefined&toPrice=undefined" initial_req = Request.from_url( method="POST", url=str(url), ) @crawler.router.default_handler async def default_handler(context: HttpCrawlingContext) -> None: context.log.info(f"Processing {context.request.url}") json_response = context.http_response.read() # <------ This is the same than this: response.json() after doing response = httpx.post(url) json_resp_parsed = json.loads(json_response) await context.push_data(json_resp_parsed) await crawler.run([initial_req])