导包

import time import json from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.keys import Keys

设置Chrome浏览器的选项,以便在后台运行

service = Service('D:\chromedriver.exe') options = webdriver.ChromeOptions() options.add_argument('headless') options.add_argument('window-size=1920x1080') options.add_argument('disable-gpu')

启动Chrome浏览器,并打开豆瓣电影页面

driver = webdriver.Chrome(service=service, options=options) driver.get('https://movie.douban.com/subject/25868125/')

点击“全部影评”按钮,加载所有影评数据

button = driver.find_element_by_class_name('more-btn') while button.is_displayed(): button.click() time.sleep(2)

获取影评数据

comments = [] for i in range(0, 60, 20): # 抓取前3页数据 url = f'https://movie.douban.com/subject/25868125/comments?start={i}&limit=20&status=P&sort=new_score' driver.get(url) items = driver.find_elements_by_css_selector('.comment-item') for item in items: comment = {} comment['user'] = item.find_element_by_class_name('comment-info').find_element_by_tag_name('a').text comment['time'] = item.find_element_by_class_name('comment-time').get_attribute('title') comment['content'] = item.find_element_by_class_name('short').text comments.append(comment)

保存影评数据到文件

with open('comments.json', 'w', encoding='utf-8') as f: json.dump(comments, f, ensure_ascii=False)

关闭浏览器

driver.quit(


原文地址: https://www.cveoy.top/t/topic/g644 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录