Selenium 自动化提取网页内题目编号
导入需要的库
from selenium.webdriver.chrome.options import Options from selenium import webdriver from selenium.webdriver.common.by import By import time import os import random
定义获取浏览器实例的函数
def get_driver(): # 检查是否已经打开浏览器 browser_opened = False for handle in webdriver.Chrome().window_handles: browser_opened = True break
# 创建浏览器实例或在已有浏览器中操作
if browser_opened:
options = Options()
options.debugger_address = '127.0.0.1:9222'
driver = webdriver.Chrome(options=options)
else:
os.system(r'start chrome --remote-debugging-port=9222 --user-data-dir='D:\评阅用'')
options = Options()
options.add_experimental_option('debuggerAddress', '127.0.0.1:9222')
driver = webdriver.Chrome(options=options)
return driver
获取浏览器实例
driver = get_driver()
找到有'内蒙古开放大学'字样的标签页
while True: for handle in driver.window_handles: driver.switch_to.window(handle) if '内蒙古开放大学' in driver.title: print('登录成功') break else: time.sleep(3) continue break
统计待批改题目数量
elements = driver.find_elements(By.CLASS_NAME, 'not-marked') print('待批改题目数量:', len(elements), '个')
查找class属性为subject-card的元素
subject_cards = driver.find_elements(By.CLASS_NAME, 'subject-card')
遍历所有subject-card元素
for subject_card in subject_cards: # 在当前subject-card元素下查找class属性为subject-number的元素 subject_numbers = subject_card.find_elements(By.CLASS_NAME, 'subject-number') # 遍历所有subject-number元素 for subject_number in subject_numbers: # 提取数字 number = subject_number.text print('题目编号:', number)
关闭浏览器
driver.quit()
原文地址: https://www.cveoy.top/t/topic/fZYK 著作权归作者所有。请勿转载和采集!