import requests\nfrom lxml import etree\nimport pandas as pd\n\n\ndef get_book_info(book):\n name = book.xpath('.//p[@class='name']/a/@title')[0]\n author = book.xpath('.//p[@class='search_book_author']/span/a/@title')[0]\n publisher = book.xpath('.//p[@class='search_book_author']/span[@class='search_book_publish']/a/@title')[0]\n publish_date = book.xpath('.//p[@class='search_book_author']/span[@class='search_book_publish']/text()')[0]\n price = book.xpath('.//span[@class='search_now_price']/text()')[0]\n rating = book.xpath('.//span[@class='search_star_black']/span/@style')[0]\n rating = int(''.join(filter(str.isdigit, rating))) * 0.05\n comment_num = book.xpath('.//a[@class='search_comment_num']/text()')[0]\n detail = book.xpath('.//p[@class='detail']/text()')[0]\n \n return {\n '书名': name,\n '作者': author,\n '出版社': publisher,\n '出版日期': publish_date,\n '价格': price,\n '评分': rating,\n '评论数': comment_num,\n '详情': detail\n }\n\nurl = 'http://search.dangdang.com/?key=%C9%F1%BE%AD%CD%F8%C2%E7&act=input'\nheaders = {\n 'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36 Edg/114.0.1823.43'\n}\nr = requests.get(url, headers=headers)\nhtml = r.text\n\nhtml = etree.HTML(html)\nbooks = html.xpath('//li[@ddt-pit='1' and @class='line1']')\n\nbook_info_list = []\nfor book in books:\n book_info = get_book_info(book)\n book_info_list.append(book_info)\n\ndf = pd.DataFrame(book_info_list)\nprint(df)


原文地址: https://www.cveoy.top/t/topic/pqJ2 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录