Shared Conversation
Cheerio Web Scraping
Feb 8, 202608:54 PM
gemini · gemini-3-flash-preview
Feb 8, 202608:54 PM
Feb 8, 202608:55 PM
gemini · gemini-3-flash-preview
Feb 8, 202608:55 PM
npm init -ynpm install axios cheerioconst axios = require('axios');const cheerio = require('cheerio');async function scrapeBooks() { try { // 1. Fetch the HTML from the URL const url = 'https://books.toscrape.com/'; const { data } = await axios.get(url); // 2. Load the HTML into cheerio const $ = cheerio.load(data); // 3. Create an array to store our results const books = []; // 4. Use selectors to find the data you want // In this site, each book is inside an <article class="product_pod"> $('.product_pod').each((index, element) => { const title = $(element).find('h3 a').attr('title'); const price = $(element).find('.price_color').text(); const availability = $(element).find('.availability').text().trim(); const link = $(element).find('h3 a').attr('href'); books.push({ title, price, availability, link: url + link }); }); // 5. Output the results console.log(books); } catch (error) { console.error(`Error fetching the page: ${error.message}`); }}scrapeBooks();const { data } = await axios.get(url, { headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' }});node scrape.jsconst webRequest = require('axios');const htmlParser = require('cheerio');async function collectSiteData() { try { const targetUrl = 'https://books.toscrape.com/'; const serverResponse = await webRequest.get(targetUrl); const reader = htmlParser.load(serverResponse.data); const finalResults = []; reader('.product_pod').each((idx, box) => { const bookTitle = reader(box).find('h3 a').attr('title'); const bookPrice = reader(box).find('.price_color').text(); const stockLevel = reader(box).find('.availability').text().trim(); const pagePath = reader(box).find('h3 a').attr('href'); const infoObject = { label: bookTitle, cost: bookPrice, inStock: stockLevel, source: targetUrl + pagePath }; finalResults.push(infoObject); }); console.log(finalResults); } catch (err) { console.log('Something went wrong: ' + err); }}collectSiteData();