Yeah, I can see that won't work for you.
I would use some sort of external storage. Like a global dictionary or some kind of class storage. Where your results would be aggregated.
Here's a simple code sample that would implement this
storage = {}
@router.default_handler
async def request_handler(context: HttpCrawlingContext) -> None:
context.log.info(f'default_handler is processing {context.request.url}')
url = "https://httpbin.org/get?a=item"
storage[url] = {}
await context.add_requests([
Request.from_url(
url=url,
label='ITEM',
user_data={"item_url":url}),
])
@router.handler('ITEM')
async def item_handler(context: HttpCrawlingContext) -> None:
context.log.info(f'item_handler is processing {context.request.url}')
url = context.request.user_data["item_url"]
tabs = [f"https://httpbin.org/get?tab{i}={i}" for i in range(11)]
storage[url]["all_tabs"] = len(tabs)
storage[url]["processed_tabs"] = 0
requests = [Request.from_url(
url=tab,
label='TAB',
user_data={"item_url":url}) for tab in tabs]
await context.add_requests(requests)
@router.handler('TAB')
async def tab_handler(context: HttpCrawlingContext) -> None:
context.log.info(f'tab_handler is processing {context.request.url}')
url = context.request.user_data["item_url"]
data = json.loads(context.http_response.read())
for key, value in data["args"].items():
storage[url].update({
key: value
})
storage[url]["processed_tabs"] += 1
if storage[url]["processed_tabs"] == storage[url]["all_tabs"]:
del storage[url]["processed_tabs"]
del storage[url]["all_tabs"]
await context.push_data(storage[url])
del storage[url]