# 导入selenium和lxml库from selenium import webdriverfrom seleniumwebdrivercommonkeys import Keysimport timeimport randomfrom lxml import etree# 设置小说搜索关键词keyword = 开挂闯异界# 创建浏览器对象driver = webdriverChrome# 打
导入selenium和lxml库
from selenium import webdriver from selenium.webdriver.common.keys import Keys import time import random from lxml import etree
设置小说搜索关键词
keyword = "开挂闯异界"
创建浏览器对象
driver = webdriver.Chrome()
打开笔趣阁小说网首页
driver.get("https://www.bige3.cc/")
输入搜索关键词并点击搜索按钮
search_input = driver.find_element_by_xpath("/html/body/div[4]/div[1]/div[2]/form/input[1]") search_input.send_keys(keyword)
按回车键进行搜索
search_input.send_keys(Keys.ENTER) time.sleep(3)
依次点击小说标题打开小说
elements = driver.find_elements_by_xpath("/html/body/div[5]/div/div/div/div/div[2]/h4/a") for i in range(len(elements)): try: # 在这里执行你想要的操作,例如获取元素文本、点击元素等 elements = driver.find_elements_by_xpath("/html/body/div[5]/div/div/div/div/div[2]/h4/a") element = elements[i] print(element.text) # 打印元素文本 # 生成随机的点击延迟时间 delay = random.uniform(3, 5) time.sleep(delay) element.click() # 点击元素
# 切换到新打开的窗口
driver.switch_to.window(driver.window_handles[-1])
time.sleep(3)
# 获取每章的标题及其内容
chapters = driver.find_elements_by_xpath("//*[@class='listmain']/dl/dd/a")
for j in range(len(chapters)):
try:
chapter = chapters[j]
chapter_title = chapter.text.replace('、', '')# 获取章节标题
chapter_url = chapter.get_attribute("href") # 获取章节链接
# 打开章节链接
driver.get(chapter_url)
time.sleep(3)
# 获取章节内容
chapter_content = driver.find_element_by_xpath("//*[@id='chaptercontent']")
html = chapter_content.get_attribute("innerHTML")
# 使用lxml解析章节内容
tree = etree.HTML(html)
content = tree.xpath("string(.)") # 获取章节内容的文本形式
print(chapter_title)
print(content)
# 后退到小说目录页
driver.back()
time.sleep(3)
except Exception as e:
continue
# 关闭当前窗口
driver.close()
except Exception as e:
continue
关闭浏览
driver.quit(
原文地址: https://www.cveoy.top/t/topic/iQ2j 著作权归作者所有。请勿转载和采集!