import { CheerioCrawler, ProxyConfiguration, AutoscaledPool, SessionPool } from 'crawlee'; import { router } from './amazon.js'; import { router } from './ebay.js'; const searchKeywords = 'hydroflasks'; // Replace with desired search keywords const startUrls = [ { url: `https://www.amazon.com/s?k=${searchKeywords}`, label: 'AMAZON' }, { url: `https://www.ebay.com/sch/i.html?_nkw=${searchKeywords}`, label: 'EBAY' }, ]; const crawler = new CheerioCrawler({ useSessionPool: true, sessionPoolOptions: { maxPoolSize: 100 }, // Set to true if you want the crawler to save cookies per session, // and set the cookie header to request automatically (default is true). persistCookiesPerSession: true, // ...but also ensure the crawler never exceeds 250 requests per minute maxRequestsPerMinute: 250, // Define router to run crawl requestHandler: router }); export { crawler } await crawler.run(startUrls);
file:///C:/Users/haris/OneDrive/Documents/GitHub/crawleeScraper/my-crawler/src/main.js:3 import { router } from './ebay.js'; ^^^^^^ SyntaxError: Identifier 'router' has already been declared at ESMLoader.moduleStrategy (node:internal/modules/esm/translators:119:18) at ESMLoader.moduleProvider (node:internal/modules/esm/loader:468:14) at async link (node:internal/modules/esm/module_job:68:21)
import { createCheerioRouter } from 'crawlee'; import fs, { link } from 'fs'; import { crawler } from './main.js'; export const router = createCheerioRouter(); router.addHandler('AMAZON', async ({ $, crawler }) => { console.log('starting link scrape') // Scrape product links from search results page const productLinks = $('h2 a').map((_, el) => 'https://www.amazon.com' + $(el).attr('href')).get(); console.log(`Found ${productLinks.length} product links for Amazon`); console.log(productLinks) // Add each product link to request queue for (const link of productLinks) { const result = await crawler.addRequests([{ url: link, label: 'AMAZON_PRODUCT' }]) await result.waitForAllRequestsToBeAdded; } // Check if there are more pages to scrape const nextPageLink = $('a[title="Next"]').attr('href'); if (nextPageLink) { // Construct the URL for the next page const nextPageUrl = 'https://www.amazon.com' + nextPageLink; // Add the request for the next page const result = await crawler.addRequests([{ url: nextPageUrl, label: 'AMAZON' }]); await result.waitForAllRequestsToBeAdded; } });
router.addHandler('AMAZON_PRODUCT', async ({ $, request }) => { const productInfo = {}; productInfo.link = request.url; productInfo.storeName = 'Amazon'; productInfo.productTitle = $('span#productTitle').text().trim(); productInfo.productDescription = $('div#productDescription').text().trim(); productInfo.salePrice = $('span#priceblock_ourprice').text().trim(); productInfo.originalPrice = $('span.priceBlockStrikePriceString').text().trim(); productInfo.reviewScore = $('span#acrPopover').attr('title'); productInfo.shippingInfo = $('div#ourprice_shippingmessage').text().trim(); // Write product info to JSON file if (Object.keys(productInfo).length > 0) { const rawData = JSON.stringify(productInfo, null, 2); fs.appendFile('rawData.json', rawData, (err) => { if (err) throw err; }); } console.log(`Product info written to rawData.json for amazon`); });
import { createCheerioRouter } from 'crawlee'; import fs, { link } from 'fs'; import { crawler } from './main.js'; export const router = createCheerioRouter(); router.addHandler('EBAY', async ({ $, crawler }) => { console.log('starting link scrape') // Scrape product links from search results page const productLinks = $('a.item__info-link').map((_, el) => $(el).attr('href')).get(); console.log(`Found ${productLinks.length} product links for eBay`); // Add each product link to request queue for (const link of productLinks) { const result = await crawler.addRequests([{ url: link, label: 'EBAY_PRODUCT' }]) await result.waitForAllRequestsToBeAdded; } }); router.addHandler('EBAY_PRODUCt', async ({ $, request }) => { const productInfo = {}; productInfo.link = request.url; productInfo.storeName = 'eBay'; productInfo.productTitle = $('h3.s-item__title').text().trim(); productInfo.productDescription = $('div.a-section.a-spacing-small.span.a-size-base-plus').text().trim(); productInfo.salePrice = $('span.s-item__price').text().trim(); productInfo.originalPrice = $('span.s-item__price--original').text().trim(); productInfo.reviewScore = $('div.s-item__reviews').text().trim(); productInfo.shippingInfo = $('span.s-item__shipping').text().trim(); // Write product info to JSON file if (Object.keys(productInfo).length > 0) { const rawData = JSON.stringify(productInfo, null, 2); fs.appendFile('rawData.json', rawData, (err) => { if (err) throw err; }); } });
export const router = createCheerioRouter();
and another one in ebay.js export const router = createCheerioRouter();
export const router = createCheerioRouter();
in main.js