小说下载器

这是一个使用 Python 编写的程序，用于自动下载 www.ixuanshu.net 网站上的小说，并将其保存为 TXT 文件。

# -*- coding:utf-8 -*-
import urllib.request
import urllib.error
import re
import os

webroot = 'http://www.ixuanshu.net'

for page in range(20, 220):
    print('正在下载第' + str(page) + '页小说')

    url = 'http://www.ixuanshu.net/soft/sort02/index_' + str(page) + '.html'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
    try:
        request = urllib.request.Request(url, headers=headers)
        response = urllib.request.urlopen(request, timeout=180)
        #print response.read()
    except urllib.error.URLError as e:
        if hasattr(e, "code"):
            print(e.code)
        if hasattr(e, "reason"):
            print(e.reason)

    html = response.read().decode('utf-8')
    #print html
    pattern = re.compile(u'<li>.*?<div class="s">.*?target="_blank">(.*?)</a><br />大小：(.*?)<br>.*?</em><br>更新：(.*?)</div>.*?<a href="(.*?)"><img.*?>(.*?)</a>.*?<div class="u">(.*?)</div>', re.S)
    items = re.findall(pattern, html)
    #print items

    for item in items:
        try:
            book_auther = item[0].encode('gbk')
            book_size = item[1].encode('gbk')
            book_updatetime = item[2].encode('gbk')
            book_link = item[3].encode('gbk')
            book_name = item[4].encode('gbk')
            book_note = item[5].encode('gbk')

            book_full_link = webroot + book_link  # 构建书的绝对地址

            #请求地址
            try:
                request = urllib.request.Request(book_full_link, headers=headers)
                response = urllib.request.urlopen(request, timeout=180)
            except urllib.error.URLError as e:
                if hasattr(e, "code"):
                    print(e.code)
                if hasattr(e, "reason"):
                    print(e.reason)
            html = response.read().decode('utf-8')
            #print html
            pattern = re.compile('<a class="downButton.*?<a class="downButton" href='(.*?)'.*?Txt.*?</a>', re.S)
            down_link = re.findall(pattern, html)
            print(book_name.decode('gbk'))
            print(down_link)

            # down txt
            try:
                request = urllib.request.Request(down_link[0].encode('utf-8'), headers=headers)
                response = urllib.request.urlopen(request, timeout=180)
            except urllib.error.URLError as e:
                if hasattr(e, "code"):
                    print(e.code)
                if hasattr(e, "reason"):
                    print(e.reason)
            try:
                fp = open(book_name.decode('gbk') + '.txt', 'w', encoding='utf-8')
            except IOError as e:
                pattern = re.compile('<strong>.*?>(.*?)<.*?</strong>', re.S)
                book_name = re.findall(pattern, book_name.decode('gbk'))
                fp = open(book_name[0] + '.txt', 'w', encoding='utf-8')
            print('start download')
            fp.write(response.read().decode('gbk'))
            print('down finish
')
            fp.close()
        except Exception as e:
            print('该条目解析出现错误，忽略')
            print(e)
            print('')
            fp = open('error.log', 'a', encoding='utf-8')
            fp.write('page:' + str(page) + '\n')
            fp.write(item[4].encode('gbk').decode('utf-8'))
            #fp.write(e)
            fp.write('\nThere is an error in parsing process.\n\n')
            fp.close()

使用方法:

确保你的电脑上已经安装了 Python 3.10。
将代码保存为 .py 文件，例如：downloader.py
在命令行中运行：python downloader.py

注意:

该程序会从 www.ixuanshu.net 网站上下载小说，请确保你已经阅读并同意该网站的使用条款。
由于网站的结构可能会发生变化，程序可能需要进行调整才能正常工作。
程序可能会遇到一些错误，例如网络连接问题，文件写入问题等。请仔细阅读错误信息并进行相应的解决。
如果你对该程序有任何问题，请随时向我提问。