小说下载器 - 快速下载您的最爱小说
import tkinter as tk from tkinter import ttk, messagebox import threading from selenium.webdriver.chrome.options import Options from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import os from lxml import etree import requests from bs4 import BeautifulSoup
class NovelDownloader: def init(self, root): self.root = root self.root.title('小说下载器') self.root.geometry('800x600')
self.setup_styles()
self.create_widgets()
self.chapters = {}
def setup_styles(self):
style = ttk.Style()
style.configure('TFrame', background='white')
style.configure('TLabel', background='white', font=('Arial', 13), foreground='navy')
style.configure('TButton', background='lightblue', font=('Arial', 13), foreground='green')
style.configure('TCheckbutton', background='white', font=('Arial', 13))
style.configure('TListbox', background='white', font=('Arial', 12))
style.configure('TText', background='white', font=('Arial', 13))
def create_widgets(self):
ttk.Label(self.root, text='请输入小说名称:', font=('Arial', 16)).pack(pady=10)
self.entry = ttk.Entry(self.root, font=('Arial', 14))
self.entry.pack(pady=5)
self.save_checkbox_var = tk.BooleanVar()
ttk.Checkbutton(self.root, text='是否下载小说内容到当前目录下', variable=self.save_checkbox_var, style='TCheckbutton').pack(pady=5)
ttk.Button(self.root, text='获取(书源1<推荐>)', command=self.search_novel_source1, style='TButton').pack(pady=10)
ttk.Button(self.root, text='获取(书源2)', command=self.search_novel_source2, style='TButton').pack(pady=10)
self.create_listbox_frame()
self.create_chapter_text_frame()
def create_listbox_frame(self):
listbox_frame = ttk.Frame(self.root)
listbox_frame.pack(side=tk.LEFT, fill=tk.Y, padx=10)
scrollbar = ttk.Scrollbar(listbox_frame)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.listbox = tk.Listbox(listbox_frame, selectmode=tk.SINGLE, yscrollcommand=scrollbar.set, font=('Arial', 12))
self.listbox.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar.config(command=self.listbox.yview)
self.listbox.bind('<<ListboxSelect>>', self.show_chapter_content)
def create_chapter_text_frame(self):
chapter_text_frame = ttk.Frame(self.root)
chapter_text_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True, padx=10)
chapter_text_scrollbar = ttk.Scrollbar(chapter_text_frame)
chapter_text_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.chapter_text = tk.Text(chapter_text_frame, yscrollcommand=chapter_text_scrollbar.set, font=('Arial', 12))
self.chapter_text.config(spacing1=10, spacing2=20)
self.chapter_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
chapter_text_scrollbar.config(command=self.chapter_text.yview)
def search_novel_source1(self):
self.search_novel(source=1)
def search_novel_source2(self):
self.search_novel(source=2)
def update_display(self):
self.listbox.delete(0, tk.END)
self.listbox.insert(tk.END, *self.chapters.keys())
def show_chapter_content(self, event):
selected_title = self.listbox.get(self.listbox.curselection())
self.chapter_text.delete('1.0', tk.END)
self.chapter_text.insert(tk.END, self.chapters[selected_title])
def crawl_novel_source1(self, keyword):
options = Options()
options.headless = True
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--window-size=1920,1080')
options.add_argument("user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'" )
driver = webdriver.Chrome(options=options)
try:
driver.get('https://www.bige3.cc/')
search_input = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/form/input[1]')
search_input.send_keys(keyword)
search_input.send_keys(Keys.ENTER)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[5]/div/div/div/div/div[2]/h4/a')))
elements = driver.find_elements(By.XPATH, '/html/body/div[5]/div/div/div/div/div[2]/h4/a')
if not elements:
messagebox.showinfo('提示', '没有找到相关小说')
return
for element in elements:
element.click()
driver.switch_to.window(driver.window_handles[-1])
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class='listmain']/dl/dd/a')))
chapter = driver.find_elements(By.XPATH, '//*[@class='listmain']/dl/dd/a')[0]
chapter.click()
while True:
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id='read']/div[5]/div[3]/h1')))
chapter_title = driver.find_element(By.XPATH, '//*[@id='read']/div[5]/div[3]/h1')
chapter_content = driver.find_element(By.XPATH, '//*[@id='chaptercontent']')
html = chapter_content.get_attribute('innerHTML')
tree = etree.HTML(html)
content = tree.xpath('string(.)')
chapter_title_text = chapter_title.text.replace('、', '')
content = content.replace('无弹窗,更新快,免费阅读!', '')
content = content.replace('请收藏本站:https://www.bige3.cc。笔趣阁手机版:https://m.bige3.cc', '')
content = content.replace('『点此报错』『加入书签』', '')
if self.save_checkbox_var.get():
book_dir = os.path.join(os.getcwd(), keyword)
if not os.path.exists(book_dir):
os.makedirs(book_dir)
chapter_path = os.path.join(book_dir, f'{chapter_title_text}.txt')
with open(chapter_path, 'w', encoding='utf-8') as f:
f.write(content)
self.chapters[chapter_title_text] = content
self.update_display()
try:
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@class='Readpage pagedown']/a[@id='pb_next']')))
next_button = driver.find_element(By.XPATH, '//div[@class='Readpage pagedown']/a[@id='pb_next']')
next_button.click()
except:
break
driver.back()
except Exception as e:
messagebox.showinfo('提示', '运行完毕')
finally:
driver.close()
driver.quit()
if self.save_checkbox_var.get():
messagebox.showinfo('提示', '小说下载完成')
def crawl_novel_source2(self, keyword):
# 书源2的代码
novel_name = keyword
# Construct the search URL
search_url = f'https://www.biquge66.net/search/?searchkey={novel_name}'
# Send an HTTP GET request to the search page
search_response = requests.get(search_url)
# Check if the request was successful
if search_response.status_code == 200:
# Parse the search page content
search_soup = BeautifulSoup(search_response.content, 'html.parser')
# Find the first search result and extract its href
first_result = search_soup.find('div', class_='image').find('a')
if first_result:
novel_href = first_result['href']
# Construct the URL for the novel's main page
novel_url = f'https://www.biquge66.net{novel_href}'
# Send an HTTP GET request to the novel's main page
response = requests.get(novel_url)
# Check if the request was successful
if response.status_code == 200:
# Parse the novel's main page content
soup = BeautifulSoup(response.content, 'html.parser')
# Find all the chapter links
chapter_lists = soup.find_all('div', class_='flex flex-between book-info-main')
# Loop through each chapter link and extract chapter content
for chapter_list in chapter_lists:
for chapter in chapter_list.find_all('a', rel='chapter'):
chapter_title = chapter.text # Get chapter title
chapter_url = 'https://www.biquge66.net' + chapter['href'] # Build complete chapter URL
# Send an HTTP GET request to get the first page of the chapter content
chapter_response = requests.get(chapter_url)
chapter_soup = BeautifulSoup(chapter_response.content, 'html.parser')
# Find the first page of chapter content
chapter_content = chapter_soup.find('div', id='booktxt')
# Extract and print the first page of chapter content
if chapter_content:
chapter_text = chapter_content.text.replace('本站最新网址:www.biquge66.net', '') # Remove site info
# Check if there is a second page
chapter_url2 = chapter_url.replace('.html', '_2.html')
chapter_response2 = requests.get(chapter_url2)
chapter_soup2 = BeautifulSoup(chapter_response2.content, 'html.parser')
chapter_content2 = chapter_soup2.find('div', id='booktxt')
# If there is a second page, concatenate it with the first page
if chapter_content2:
chapter_text2 = chapter_content2.text.replace('本站最新网址:www.biquge66.net', '') # Remove site info
chapter_text += chapter_text2
if self.save_checkbox_var.get():
book_dir = os.path.join(os.getcwd(), keyword)
if not os.path.exists(book_dir):
os.makedirs(book_dir)
chapter_path = os.path.join(book_dir, f'{chapter_title}.txt')
with open(chapter_path, 'w', encoding='utf-8') as f:
f.write(chapter_text)
# 更新self.chapters字典
self.chapters[chapter_title] = chapter_text
# 更新显示
self.update_display()
else:
messagebox.showinfo('提示','无法获取章节内容: {chapter_title}
') else: messagebox.showerror('错误','无法检索小说页面: {novel_url}') else: messagebox.showwarning('警告','未找到给定小说名称的搜索结果') else: messagebox.showerror('错误','无法检索搜索结果页: {search_url}')
def search_novel(self, source):
keyword = self.entry.get()
if not keyword:
messagebox.showwarning('警告', '请输入小说名称')
return
threading.Thread(target=self.crawl_novel, args=(keyword, source)).start()
def crawl_novel(self, keyword, source):
if source == 1:
self.crawl_novel_source1(keyword)
elif source == 2:
self.crawl_novel_source2(keyword)
# 使用多线程加速爬取
if source == 1:
threads = []
for i in range(4):
thread = threading.Thread(target=self.crawl_novel_source1_thread, args=(keyword, i))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
elif source == 2:
threads = []
for i in range(4):
thread = threading.Thread(target=self.crawl_novel_source2_thread, args=(keyword, i))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
def crawl_novel_source1_thread(self, keyword, thread_id):
# ... (crawl_novel_source1代码)
def crawl_novel_source2_thread(self, keyword, thread_id):
# ... (crawl_novel_source2代码)
if name == 'main': root = tk.Tk() app = NovelDownloader(root) root.mainloop()
原文地址: https://www.cveoy.top/t/topic/qCbc 著作权归作者所有。请勿转载和采集!