以下是一个使用Python编写爬虫爬取当当网搜索页面信息的示例代码:

import requests
from lxml import etree
import pandas as pd

def scrape_dangdang(keyword):
    url = f"http://search.dangdang.com/?key={keyword}&act=input"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    html = response.text
    tree = etree.HTML(html)

    titles = tree.xpath('//ul[@class="bigimg"]/li//a[@class="pic"]/img/@alt')
    promotions = tree.xpath('//ul[@class="bigimg"]/li//p[@class="detail"]/text()')
    authors = tree.xpath('//ul[@class="bigimg"]/li//p[@class="search_book_author"]/span[1]/a/text()')
    publish_times = tree.xpath('//ul[@class="bigimg"]/li//p[@class="search_book_author"]/span[2]/text()')
    publishers = tree.xpath('//ul[@class="bigimg"]/li//p[@class="search_book_author"]/span[3]/a/text()')
    prices = tree.xpath('//ul[@class="bigimg"]/li//p[@class="price"]/span[@class="search_now_price"]/text()')
    stars = tree.xpath('//ul[@class="bigimg"]/li//div[@class="star"]/a/text()')
    comments = tree.xpath('//ul[@class="bigimg"]/li//div[@class="star"]/span/a/text()')
    intros = tree.xpath('//ul[@class="bigimg"]/li//p[@class="detail"]/text()')

    data = {
        "书名": titles,
        "促销语": promotions,
        "作者": authors,
        "出版时间": publish_times,
        "出版社": publishers,
        "价格": prices,
        "星级": stars,
        "评论数": comments,
        "简介": intros
    }

    df = pd.DataFrame(data)
    df.to_csv(f"当当搜索_{keyword}_单页面.csv", index=False)
    print(df)

if __name__ == "__main__":
    keyword = "神经网络"
    scrape_dangdang(keyword)

请确保已安装requests、lxml和pandas库。运行代码后,将会输出提取的信息并保存为csv文件

当当网搜索页面按照关键词比如说神经网络搜索使用Python编写爬虫利用Xpath自动爬取搜索结果中图书的书名、促销语、出版信息包括作者、出版时间、出版社、价格、星级、评论数和简介等信息。实现第1页内容获取和信息提取利用pandas库将提取的信息打印出来如图4-1所示并保存为当当搜索_关键词_单页面csv关键词用具体的关键词取代比如当当搜索_神经网络_单页面csv文件。

原文地址: https://www.cveoy.top/t/topic/hHvy 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录