Python 爬取携程景点评论并保存到 Excel 文件
from openpyxl import Workbook
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# 创建浏览器对象
options = webdriver.FirefoxOptions()
# 指定浏览器地址
options.binary_location = r'C:\Users\yuxin\AppData\Local\Mozilla Firefox\firefox.exe'
# options.add_argument('--headless')
# options.add_argument('--diable-gpu')
browser = webdriver.Firefox(options=options)
# 打开网站
browser.get('https://you.ctrip.com/sight/dayi3130/71986.html#ctm_ref=www_hp_his_lst')
wb = Workbook()
sheet = wb.active
sheet['A1'] = '内容'
sheet['B1'] = '时间'
page_num = 1
while True:
# 等待下一页按钮可点击
next_button = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.ant-pagination-next > span:nth-child(1)')))
# 获取当前页的所有评论
comments = browser.find_elements(By.XPATH, '/html/body/div[2]/div[2]/div/div[3]/div/div[4]/div[1]/div[4]/div/div[5]/div')
for comment in comments:
content = comment.find_element(By.XPATH, './/div[@class="commentDetail"]/text()').get_attribute('textContent')
contenttime = comment.find_element(By.XPATH, './/div[@class="commentTime"]/text()').get_attribute('textContent')
sheet.append([content, contenttime])
# 保存数据
wb.save('./jc6301.xlsx')
# 点击下一页按钮
next_button.click()
page_num += 1
if page_num > 215:
break
# 关闭浏览器
browser.quit()
修正的地方包括:
- 获取评论内容和时间的方式由
get_attribute("xpath")改为get_attribute("textContent")。 - 点击下一页按钮的方式由
execute_script("arguments[0].click();", next_button)改为next_button.click()。
原文地址: https://www.cveoy.top/t/topic/o6Dc 著作权归作者所有。请勿转载和采集!