kotnRouter.addHandler('KOTN_DETAIL', async ({ log, page, parseWithCheerio }) => { log.info(`Scraping product URLs`); const $ = await parseWithCheerio() const productUrls: string[] = []; $('a').each((_, el) => { let productUrl = $(el).attr('href'); if (productUrl) { if (!productUrl.startsWith('https://')) { productUrl = 'https://www.kotn.com' + productUrl; if(productUrl.includes('/products')){ productUrls.push(productUrl); } } } }); // Push unique URLs to the dataset const uniqueProductUrls = Array.from(new Set(productUrls)); await Dataset.pushData({ urls: uniqueProductUrls, }); await Promise.all(uniqueProductUrls.map(link => kotnPw.addRequests([{ url: link, label: 'KOTN_PRODUCT' }]))); linksCount += uniqueProductUrls.length; await infiniteScroll(page, { maxScrollHeight: 0, }); console.log(uniqueProductUrls); console.log(`Total product links scraped so far: ${linksCount}`); // Run bronPuppet crawler once after pushing the first product requests if (linksCount === uniqueProductUrls.length) { await kotnPw.run(); } });
infiniteScroll
to a Promise.all
or Promise.race
in orderer for it to keep scrolling while you run another function beside it in the same Promise.all
or Promise.race
.infiniteScroll
function, and inside the stopScrollCallback
option, you can collect the products and stop it once you don't find more.