url =httpswwwbookingcnhotelcnchengdu-tuo-ban-xie-qing-nian-lv-shezh-cnhtml#tab-reviews# 创建浏览器对象options = webdriverFirefoxOptionsoptionsbinary_location = rCUsersMozilla Firefoxfirefoxexebrowser = webdr
导入xpan库
from xpan import xp
获取评论元素列表
reviews_list = browser.find_elements(By.CSS_SELECTOR, '.review_list .review_item')
循环遍历每个评论元素,提取对应的信息
for review in reviews_list: # 获取评论内容 content = review.find_element(By.CSS_SELECTOR, '.review_item_review').text # 获取评论时间 date = review.find_element(By.CSS_SELECTOR, '.review_item_date').text # 获取评论人名字 name = review.find_element(By.CSS_SELECTOR, '.review_item_reviewer .bui-avatar__title').text # 获取评论人国籍 nationality = review.find_element(By.CSS_SELECTOR, '.review_item_reviewer .bui-avatar__subtitle').text # 获取评论分数 score = review.find_element(By.CSS_SELECTOR, '.review-score-badge').text # 将信息打印出来 print('评论内容:', content) print('评论时间:', date) print('评论人名字:', name) print('评论人国籍:', nationality) print('评论分数:', score) print('-' * 50)
判断是否有下一页按钮
next_button = browser.find_element(By.CSS_SELECTOR, '.pagenext') if 'inactive' not in next_button.get_attribute('class'): # 如果有下一页按钮,则点击它 next_button.click() # 等待页面加载完成 time.sleep(10) # 使用xp函数提取评论信息 xp("//*[@class='review_list']//div[@itemprop='review']", ['content:.review_item_review', 'date:.review_item_date', 'name:.review_item_reviewer .bui-avatar__title', 'nationality:.review_item_reviewer .bui-avatar__subtitle', 'score:.review-score-badge']
原文地址: https://www.cveoy.top/t/topic/eqwk 著作权归作者所有。请勿转载和采集!