其中的CSV文件应该改为JSON文件,修改后的代码如下:

import scrapy import json

class JDSpider(scrapy.Spider): name = 'jd' allowed_domains = ['jd.com'] start_urls = ['https://www.jd.com/']

def parse(self, response):
    # 获取所有商品的链接
    links = response.css('.cate_menu_item a::attr(href)').extract()
    for link in links:
        yield scrapy.Request(link, callback=self.parse_category)

def parse_category(self, response):
    # 获取该分类下所有商品的链接
    links = response.css('.gl-item .p-name a::attr(href)').extract()
    for link in links:
        yield scrapy.Request(link, callback=self.parse_product)

def parse_product(self, response):
    # 获取商品的名称、价格和链接
    name = response.css('.sku-name::text').extract_first().strip()
    price = response.css('.price .price::text').extract_first().strip()
    link = response.url

    # 存储到JSON文件中
    with open('products.json', 'a') as f:
        json.dump({'name': name, 'price': price, 'link': link}, f)
        f.write('\n'
import scrapyclass JDSpiderscrapySpider name = jd allowed_domains = jdcom start_urls = httpswwwjdcom def parseself response # 获取所有商品的链接 links = responsecsscate_menu_item aatt

原文地址: https://www.cveoy.top/t/topic/fp1m 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录