import sys
import os
import math
import time
import requests
import urllib.request
import threading
from urllib.parse import urlparse
from tqdm import tqdm


class DownloadThread(threading.Thread):
    def __init__(self, url, start, end, fileobj, bar):
        threading.Thread.__init__(self)
        self.url = url
        self.start = start
        self.end = end
        self.fileobj = fileobj
        self.bar = bar

    def run(self):
        headers = {'Range': 'bytes=%d-%d' % (self.start, self.end)}
        response = requests.get(self.url, headers=headers, stream=True)
        for chunk in response.iter_content(chunk_size=1024):
            if not chunk:
                break
            self.fileobj.write(chunk)
            self.bar.update(len(chunk))


class FTPDownloadThread(threading.Thread):
    def __init__(self, ftp_url, start, end, fileobj, bar):
        threading.Thread.__init__(self)
        self.ftp_url = ftp_url
        self.start = start
        self.end = end
        self.fileobj = fileobj
        self.bar = bar

    def run(self):
        req = urllib.request.urlopen(self.ftp_url)
        req.sendcmd('TYPE I')
        fileSize = int(req.headers['Content-Length'])

        ftp = urllib.request.urlopen(self.ftp_url)
        for i in ['RETR {}'.format(os.path.basename(self.ftp_url))]:
            ftp.sendcmd(i)
        ftp.settimeout(60)

        ftp_read = True
        while ftp_read:
            data = ftp.read(1024)
            if not data:
                break

            self.fileobj.write(data)
            self.bar.update(len(data))
        ftp.close()


def download_file(url: str, filename: str, num_threads: int = 4) -> None:
    os.makedirs('Download', exist_ok=True)
    filename = os.path.join('Download', filename)
    response = requests.get(url, stream=True)
    if 'Content-Length' in response.headers:
        file_size = int(response.headers['Content-Length'])
    else:
        print('The file size is unknown, use single thread download.')
        download_single_thread(url=url, filename=filename)
        return

    # 判断文件大小是否为0
    if file_size == 0:
        raise ValueError('Failed to get file size, please check the download URL')

    with open(filename, 'wb') as file:
        with tqdm(total=file_size, unit='B', unit_scale=True,
                  desc=os.path.basename(filename), ncols=80, position=0) as bar:

            download_time = 0
            chunk_size = 1024
            avg_speed = 0
            last_speed = 0
            while True:
                start = time.time()
                chunk = response.raw.read(chunk_size * num_threads)
                end = time.time()

                if not chunk:
                    break

                used_time = end - start
                download_time += used_time

                speed = chunk_size * num_threads / used_time
                avg_speed = (avg_speed + speed) / 2

                if avg_speed > 0:
                    last_speed = avg_speed
                    num_threads = int(file_size / avg_speed / chunk_size) + 1
                num_threads = min(max(num_threads, 1), 32)

                thread_pool = []
                start = 0
                end = -1
                for i in range(num_threads):
                    start = end + 1
                    end = start + chunk_size - 1

                    if i == num_threads - 1:
                        end = file_size - 1

                    thread = DownloadThread(url=url, start=start, end=end, fileobj=file, bar=bar)
                    thread.setDaemon(True)
                    thread_pool.append(thread)

                for thread in thread_pool:
                    thread.start()

                for thread in thread_pool:
                    thread.join()

                bar.update(chunk_size * num_threads)

            bar.close()

    return filename


def download_ftp(input_str: str, filename: str, num_threads: int = 4) -> None:
    os.makedirs('Download', exist_ok=True)
    filename = os.path.join('Download', filename)
    o = urlparse(input_str)

    if o.scheme != 'ftp':
        raise ValueError(f'{input_str} is not an FTP URL.')

    ftp_url = input_str
    ftp_file_size = -1
    req = urllib.request.urlopen(ftp_url)
    if 'Content-Length' in req.headers:
        ftp_file_size = int(req.headers['Content-Length'])

    with open(filename, 'wb') as file:
        with tqdm(total=ftp_file_size, unit='B', unit_scale=True,
                  desc=os.path.basename(filename), ncols=80, position=0) as bar:

            thread_pool = []
            chunk_size = 1024
            if ftp_file_size < 0:
                thread = FTPDownloadThread(ftp_url, 0, 0xFFFFFFFF, file, bar)
                thread.setDaemon(True)
                thread_pool.append(thread)

            else:
                download_time = 0
                avg_speed = 0
                last_speed = 0
                while True:
                    start = time.time()
                    chunk = urllib.request.urlopen(ftp_url).read(chunk_size * num_threads)
                    end = time.time()

                    if not chunk:
                        break

                    used_time = end - start
                    download_time += used_time

                    speed = chunk_size * num_threads / used_time
                    avg_speed = (avg_speed + speed) / 2

                    if avg_speed > 0:
                        last_speed = avg_speed
                        num_threads = int(ftp_file_size / avg_speed / chunk_size) + 1
                    num_threads = min(max(num_threads, 1), 32)

                    thread_pool = []
                    start = 0
                    end = -1
                    for i in range(num_threads):
                        start = end + 1
                        end = start + chunk_size - 1

                        if i == num_threads - 1:
                            end = ftp_file_size - 1

                        thread = FTPDownloadThread(ftp_url=ftp_url, start=start, end=end, fileobj=file, bar=bar)
                        thread.setDaemon(True)
                        thread_pool.append(thread)

                    for thread in thread_pool:
                        thread.start()

                    for thread in thread_pool:
                        thread.join()

                    bar.update(chunk_size * num_threads)

            bar.close()

    return filename


def download_single_thread(url: str, filename: str) -> None:
    response = requests.get(url, stream=True)
    with open(filename, 'wb') as f:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)

功能:

  • 支持 HTTP 和 FTP 协议下载文件
  • 多线程下载,提高下载速度
  • 使用进度条显示下载进度
  • 自动调整线程数量,以优化下载速度

使用方法:

# 下载 HTTP 文件
download_file('https://example.com/file.zip', 'file.zip')

# 下载 FTP 文件
download_ftp('ftp://user:password@example.com/file.txt', 'file.txt')

注意:

  • 下载 FTP 文件需要提供正确的用户名和密码
  • 默认线程数量为 4,可以根据需要进行调整
  • 下载路径为 Download 文件夹,如果不存在则自动创建

代码改进:

  • 变量命名更具描述性
  • 添加错误处理
  • 优化代码结构,提高可读性
  • 使用 tqdm 库显示下载进度
  • 自动调整线程数量,以优化下载速度
Python 多线程文件下载器:支持 HTTP 和 FTP 协议

原文地址: https://www.cveoy.top/t/topic/oHSt 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录