from openpyxl import Workbook load_workbookfrom seleniumwebdrivercommonby import Byfrom selenium import webdriverimport reimport timefrom lxml import etreefrom seleniumwebdriversupportui import WebDri
要修改代码等待下一页的CSS选择器出现并点击,可以使用WebDriverWait和expected_conditions中的元素可见方法来实现。具体的修改如下:
- 导入WebDriverWait和expected_conditions:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
- 在循环内部使用WebDriverWait来等待下一页的CSS选择器出现:
# 等待下一页按钮出现
next_button = WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.ant-pagination-next > span:nth-child(1)')))
- 点击下一页按钮:
next_button.click()
完整代码如下:
from openpyxl import Workbook, load_workbook
from selenium.webdriver.common.by import By
from selenium import webdriver
import re
import time
from lxml import etree
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# 创建浏览器对象
options = webdriver.FirefoxOptions()
#浏览器地址
options.binary_location = r'C:\Users\yuxin\AppData\Local\Mozilla Firefox\firefox.exe'
options.add_argument('--headless')
options.add_argument('--diable-gpu')
browser = webdriver.Firefox(options=options)
#打开网站
browser.get("https://you.ctrip.com/sight/dayi3130/71986.html")
wb = Workbook()
sheet = wb.active
sheet["A1"] = "内容"
sheet["B1"] = "时间"
page_num = 1
while page_num <= 210:
# 等待页面内容加载完成
WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div[2]/div/div[3]/div/div[4]/div[1]/div[4]/div/div[5]/div')))
page_source = browser.page_source
tree = etree.HTML(page_source)
div_list = tree.xpath('/html/body/div[2]/div[2]/div/div[3]/div/div[4]/div[1]/div[4]/div/div[5]/div')
for div in div_list:
content = div.xpath('.//div[@class="commentDetail"]/text()')[0]
contenttime = div.xpath('.//div[@class="commentTime"]/text()')[0]
sheet.append([content, contenttime])
wb.save("./jc630.xlsx")
page_num += 1
try:
# 等待下一页按钮出现
next_button = WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.ant-pagination-next > span:nth-child(1)')))
next_button.click()
time.sleep(0.3)
except:
break
browser.quit()
这样就可以在每一页加载完毕之后等待下一页的CSS选择器出现,并进行点击操作了
原文地址: http://www.cveoy.top/t/topic/hDJb 著作权归作者所有。请勿转载和采集!