import tkinter as tk from tkinter import ttk, messagebox import threading from selenium.webdriver.chrome.options import Options from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import os from lxml import etree import requests from bs4 import BeautifulSoup

class NovelDownloader: def init(self, root): self.root = root self.root.title('小说下载器') self.root.geometry('800x600')

    self.setup_styles()
    self.create_widgets()
    self.chapters = {}

def setup_styles(self):
    style = ttk.Style()
    style.configure('TFrame', background='white')
    style.configure('TLabel', background='white', font=('Arial', 13), foreground='navy')
    style.configure('TButton', background='lightblue', font=('Arial', 13), foreground='green')
    style.configure('TCheckbutton', background='white', font=('Arial', 13))
    style.configure('TListbox', background='white', font=('Arial', 12))
    style.configure('TText', background='white', font=('Arial', 13))

def create_widgets(self):
    ttk.Label(self.root, text='请输入小说名称:', font=('Arial', 16)).pack(pady=10)
    self.entry = ttk.Entry(self.root, font=('Arial', 14))
    self.entry.pack(pady=5)

    self.save_checkbox_var = tk.BooleanVar()
    ttk.Checkbutton(self.root, text='是否下载小说内容到当前目录下', variable=self.save_checkbox_var, style='TCheckbutton').pack(pady=5)

    ttk.Button(self.root, text='获取(书源1<推荐>)', command=self.search_novel_source1, style='TButton').pack(pady=10)
    ttk.Button(self.root, text='获取(书源2)', command=self.search_novel_source2, style='TButton').pack(pady=10)

    self.create_listbox_frame()
    self.create_chapter_text_frame()

def create_listbox_frame(self):
    listbox_frame = ttk.Frame(self.root)
    listbox_frame.pack(side=tk.LEFT, fill=tk.Y, padx=10)

    scrollbar = ttk.Scrollbar(listbox_frame)
    scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

    self.listbox = tk.Listbox(listbox_frame, selectmode=tk.SINGLE, yscrollcommand=scrollbar.set, font=('Arial', 12))
    self.listbox.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

    scrollbar.config(command=self.listbox.yview)
    self.listbox.bind('<<ListboxSelect>>', self.show_chapter_content)

def create_chapter_text_frame(self):
    chapter_text_frame = ttk.Frame(self.root)
    chapter_text_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True, padx=10)

    chapter_text_scrollbar = ttk.Scrollbar(chapter_text_frame)
    chapter_text_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)

    self.chapter_text = tk.Text(chapter_text_frame, yscrollcommand=chapter_text_scrollbar.set, font=('Arial', 12))
    self.chapter_text.config(spacing1=10, spacing2=20)
    self.chapter_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

    chapter_text_scrollbar.config(command=self.chapter_text.yview)

def search_novel_source1(self):
    self.search_novel(source=1)

def search_novel_source2(self):
    self.search_novel(source=2)

def update_display(self):
    self.listbox.delete(0, tk.END)
    self.listbox.insert(tk.END, *self.chapters.keys())

def show_chapter_content(self, event):
    selected_title = self.listbox.get(self.listbox.curselection())
    self.chapter_text.delete('1.0', tk.END)
    self.chapter_text.insert(tk.END, self.chapters[selected_title])

def crawl_novel_source1(self, keyword):
    options = Options()
    options.headless = True
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--window-size=1920,1080')
    options.add_argument("user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'" )

    driver = webdriver.Chrome(options=options)

    try:
        driver.get('https://www.bige3.cc/')
        search_input = driver.find_element(By.XPATH, '/html/body/div[4]/div[1]/div[2]/form/input[1]')
        search_input.send_keys(keyword)
        search_input.send_keys(Keys.ENTER)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[5]/div/div/div/div/div[2]/h4/a')))

        elements = driver.find_elements(By.XPATH, '/html/body/div[5]/div/div/div/div/div[2]/h4/a')
        if not elements:
            messagebox.showinfo('提示', '没有找到相关小说')
            return

        for element in elements:
            element.click()
            driver.switch_to.window(driver.window_handles[-1])
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@class='listmain']/dl/dd/a')))

            chapter = driver.find_elements(By.XPATH, '//*[@class='listmain']/dl/dd/a')[0]
            chapter.click()
            while True:
                WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id='read']/div[5]/div[3]/h1')))
                chapter_title = driver.find_element(By.XPATH, '//*[@id='read']/div[5]/div[3]/h1')
                chapter_content = driver.find_element(By.XPATH, '//*[@id='chaptercontent']')
                html = chapter_content.get_attribute('innerHTML')
                tree = etree.HTML(html)
                content = tree.xpath('string(.)')
                chapter_title_text = chapter_title.text.replace('、', '')
                content = content.replace('无弹窗,更新快,免费阅读!', '')
                content = content.replace('请收藏本站:https://www.bige3.cc。笔趣阁手机版:https://m.bige3.cc', '')
                content = content.replace('『点此报错』『加入书签』', '')

                if self.save_checkbox_var.get():
                    book_dir = os.path.join(os.getcwd(), keyword)
                    if not os.path.exists(book_dir):
                        os.makedirs(book_dir)

                    chapter_path = os.path.join(book_dir, f'{chapter_title_text}.txt')
                    with open(chapter_path, 'w', encoding='utf-8') as f:
                        f.write(content)

                self.chapters[chapter_title_text] = content
                self.update_display()

                try:
                    WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@class='Readpage pagedown']/a[@id='pb_next']')))
                    next_button = driver.find_element(By.XPATH, '//div[@class='Readpage pagedown']/a[@id='pb_next']')
                    next_button.click()
                except:
                    break

            driver.back()

    except Exception as e:
        messagebox.showinfo('提示', '运行完毕')
    finally:
        driver.close()
        driver.quit()

        if self.save_checkbox_var.get():
            messagebox.showinfo('提示', '小说下载完成')

def crawl_novel_source2(self, keyword):
    # 书源2的代码
    novel_name = keyword

    # Construct the search URL
    search_url = f'https://www.biquge66.net/search/?searchkey={novel_name}'

    # Send an HTTP GET request to the search page
    search_response = requests.get(search_url)

    # Check if the request was successful
    if search_response.status_code == 200:
        # Parse the search page content
        search_soup = BeautifulSoup(search_response.content, 'html.parser')

        # Find the first search result and extract its href
        first_result = search_soup.find('div', class_='image').find('a')
        if first_result:
            novel_href = first_result['href']

            # Construct the URL for the novel's main page
            novel_url = f'https://www.biquge66.net{novel_href}'

            # Send an HTTP GET request to the novel's main page
            response = requests.get(novel_url)

            # Check if the request was successful
            if response.status_code == 200:
                # Parse the novel's main page content
                soup = BeautifulSoup(response.content, 'html.parser')

                # Find all the chapter links
                chapter_lists = soup.find_all('div', class_='flex flex-between book-info-main')

                # Loop through each chapter link and extract chapter content
                for chapter_list in chapter_lists:
                    for chapter in chapter_list.find_all('a', rel='chapter'):
                        chapter_title = chapter.text  # Get chapter title
                        chapter_url = 'https://www.biquge66.net' + chapter['href']  # Build complete chapter URL

                        # Send an HTTP GET request to get the first page of the chapter content
                        chapter_response = requests.get(chapter_url)
                        chapter_soup = BeautifulSoup(chapter_response.content, 'html.parser')

                        # Find the first page of chapter content
                        chapter_content = chapter_soup.find('div', id='booktxt')

                        # Extract and print the first page of chapter content
                        if chapter_content:
                            chapter_text = chapter_content.text.replace('本站最新网址:www.biquge66.net', '')  # Remove site info

                            # Check if there is a second page
                            chapter_url2 = chapter_url.replace('.html', '_2.html')
                            chapter_response2 = requests.get(chapter_url2)
                            chapter_soup2 = BeautifulSoup(chapter_response2.content, 'html.parser')
                            chapter_content2 = chapter_soup2.find('div', id='booktxt')

                            # If there is a second page, concatenate it with the first page
                            if chapter_content2:
                                chapter_text2 = chapter_content2.text.replace('本站最新网址:www.biquge66.net', '')  # Remove site info
                                chapter_text += chapter_text2

                            if self.save_checkbox_var.get():
                                book_dir = os.path.join(os.getcwd(), keyword)
                                if not os.path.exists(book_dir):
                                    os.makedirs(book_dir)

                                chapter_path = os.path.join(book_dir, f'{chapter_title}.txt')
                                with open(chapter_path, 'w', encoding='utf-8') as f:
                                    f.write(chapter_text)
                            # 更新self.chapters字典
                            self.chapters[chapter_title] = chapter_text

                            # 更新显示
                            self.update_display()
                        else:
                            messagebox.showinfo('提示','无法获取章节内容: {chapter_title}

') else: messagebox.showerror('错误','无法检索小说页面: {novel_url}') else: messagebox.showwarning('警告','未找到给定小说名称的搜索结果') else: messagebox.showerror('错误','无法检索搜索结果页: {search_url}')

def search_novel(self, source):
    keyword = self.entry.get()

    if not keyword:
        messagebox.showwarning('警告', '请输入小说名称')
        return

    threading.Thread(target=self.crawl_novel, args=(keyword, source)).start()

def crawl_novel(self, keyword, source):
    if source == 1:
        self.crawl_novel_source1(keyword)
    elif source == 2:
        self.crawl_novel_source2(keyword)

    # 使用多线程加速爬取
    if source == 1:
        threads = []
        for i in range(4):
            thread = threading.Thread(target=self.crawl_novel_source1_thread, args=(keyword, i))
            threads.append(thread)
            thread.start()
        for thread in threads:
            thread.join()

    elif source == 2:
        threads = []
        for i in range(4):
            thread = threading.Thread(target=self.crawl_novel_source2_thread, args=(keyword, i))
            threads.append(thread)
            thread.start()
        for thread in threads:
            thread.join()

def crawl_novel_source1_thread(self, keyword, thread_id):
    # ... (crawl_novel_source1代码)

def crawl_novel_source2_thread(self, keyword, thread_id):
    # ... (crawl_novel_source2代码)

if name == 'main': root = tk.Tk() app = NovelDownloader(root) root.mainloop()

小说下载器 - 快速下载您的最爱小说

原文地址: https://www.cveoy.top/t/topic/qCbc 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录