Apify and Crawlee Official Forum

Updated 4 months ago

Async link parsing for faster results

I have previously used enqueueLinks with a selector to add more URLs to the queue, with good results - it's nearly instant on pages with around 100 links. I now need to modify the unique ID so I'm looping through the results on the page - even though the links are already loaded, this is very slow. Is there a way to do this faster, while still getting the additional attributes?

Plain Text
const processResults = async (locator: Locator) => {
  const queue: {
      [key: string]: {
          name: string | null
          address: string | null
      }
  } = {}
  for await (const result of await locator.all()) {
      try {
          const resultLinkLocator = result.locator(`a[aria-label]`)
          const addressShortLocator = result.locator(
              `span[aria-hidden]:has-text("·") + span:not([role="img"])`
          )
          const name = await resultLinkLocator.getAttribute(
              "aria-label",
              {
                  timeout: 5_000,
              }
          )
          log.info(`Result name: ${name}`)
          const address = await addressShortLocator.textContent({
              timeout: 5_000,
          })
          const url = await resultLinkLocator.getAttribute("href", {
              timeout: 5_000,
          })

          if (!url) {
              log.info(`No url found for result ${name}`)
              continue
          }
          queue[url] = {
              name,
              address,
          }
      } catch (e: any) {
          log.info(`Error queueing result. Error: ${e}`)
      }
  }
  return queue
}
const urls = Object.keys(linkQueue)
await enqueueLinks({
    label: "PLACE_DETAIL",
    urls: urls,
    transformRequestFunction: (request) => {
        request.uniqueKey = `${linkQueue[request.url].name}|${
            linkQueue[request.url].address ?? location
        }`
        return request
    },
    strategy: "same-domain",
    userData,
})
L
1 comment
What exactly is slow, do you have timers at specific points? 100 links is nothing. Perf problems only happen with crazy code usually
Add a reply
Sign up and join the conversation on Discord