以下是Python代码实现:

# 导入所需库
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import json

# 设置Chrome浏览器无界面模式
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(options=options)

# 进入豆瓣电影页面
url = 'https://movie.douban.com/subject/25868125/'
driver.get(url)

# 点击进入全部影评
all_reviews_button = driver.find_element_by_xpath("//div[@id='interest_sectl']//a[@class='more-link']")
all_reviews_button.click()

# 等待页面加载
try:
    element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//div[@class='review-list']"))
    )
except:
    driver.quit()

# 抓取第1页的影评
reviews = []
review_list = driver.find_elements_by_xpath("//div[@class='review-list']//div[@class='main review-item']")
for item in review_list:
    review = {}
    review['name'] = item.find_element_by_xpath(".//header//a").text
    review['time'] = item.find_element_by_xpath(".//header//span").text
    review['comment'] = item.find_element_by_xpath(".//div[@class='short-content']").text
    reviews.append(review)

# 抓取第2-3页的影评
for i in range(2, 4):
    next_page_button = driver.find_element_by_xpath("//div[@class='paginator']//a[@data-page='" + str(i) + "']")
    next_page_button.click()
    # 等待页面加载
    time.sleep(5)
    review_list = driver.find_elements_by_xpath("//div[@class='review-list']//div[@class='main review-item']")
    for item in review_list:
        review = {}
        review['name'] = item.find_element_by_xpath(".//header//a").text
        review['time'] = item.find_element_by_xpath(".//header//span").text
        review['comment'] = item.find_element_by_xpath(".//div[@class='short-content']").text
        reviews.append(review)

# 将数据存储为json格式
with open('reviews.json', 'w', encoding='utf-8') as f:
    json.dump(reviews, f, ensure_ascii=False)

# 关闭浏览器
driver.quit()

运行完毕后,会在当前路径下生成一个名为“reviews.json”的文件,其中包含了所有影评的数据


原文地址: https://www.cveoy.top/t/topic/g69D 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录