Python 多线程文件下载器:支持 HTTP 和 FTP 协议
import sys
import os
import math
import time
import requests
import urllib.request
import threading
from urllib.parse import urlparse
from tqdm import tqdm
class DownloadThread(threading.Thread):
def __init__(self, url, start, end, fileobj, bar):
threading.Thread.__init__(self)
self.url = url
self.start = start
self.end = end
self.fileobj = fileobj
self.bar = bar
def run(self):
headers = {'Range': 'bytes=%d-%d' % (self.start, self.end)}
response = requests.get(self.url, headers=headers, stream=True)
for chunk in response.iter_content(chunk_size=1024):
if not chunk:
break
self.fileobj.write(chunk)
self.bar.update(len(chunk))
class FTPDownloadThread(threading.Thread):
def __init__(self, ftp_url, start, end, fileobj, bar):
threading.Thread.__init__(self)
self.ftp_url = ftp_url
self.start = start
self.end = end
self.fileobj = fileobj
self.bar = bar
def run(self):
req = urllib.request.urlopen(self.ftp_url)
req.sendcmd('TYPE I')
fileSize = int(req.headers['Content-Length'])
ftp = urllib.request.urlopen(self.ftp_url)
for i in ['RETR {}'.format(os.path.basename(self.ftp_url))]:
ftp.sendcmd(i)
ftp.settimeout(60)
ftp_read = True
while ftp_read:
data = ftp.read(1024)
if not data:
break
self.fileobj.write(data)
self.bar.update(len(data))
ftp.close()
def download_file(url: str, filename: str, num_threads: int = 4) -> None:
os.makedirs('Download', exist_ok=True)
filename = os.path.join('Download', filename)
response = requests.get(url, stream=True)
if 'Content-Length' in response.headers:
file_size = int(response.headers['Content-Length'])
else:
print('The file size is unknown, use single thread download.')
download_single_thread(url=url, filename=filename)
return
# 判断文件大小是否为0
if file_size == 0:
raise ValueError('Failed to get file size, please check the download URL')
with open(filename, 'wb') as file:
with tqdm(total=file_size, unit='B', unit_scale=True,
desc=os.path.basename(filename), ncols=80, position=0) as bar:
download_time = 0
chunk_size = 1024
avg_speed = 0
last_speed = 0
while True:
start = time.time()
chunk = response.raw.read(chunk_size * num_threads)
end = time.time()
if not chunk:
break
used_time = end - start
download_time += used_time
speed = chunk_size * num_threads / used_time
avg_speed = (avg_speed + speed) / 2
if avg_speed > 0:
last_speed = avg_speed
num_threads = int(file_size / avg_speed / chunk_size) + 1
num_threads = min(max(num_threads, 1), 32)
thread_pool = []
start = 0
end = -1
for i in range(num_threads):
start = end + 1
end = start + chunk_size - 1
if i == num_threads - 1:
end = file_size - 1
thread = DownloadThread(url=url, start=start, end=end, fileobj=file, bar=bar)
thread.setDaemon(True)
thread_pool.append(thread)
for thread in thread_pool:
thread.start()
for thread in thread_pool:
thread.join()
bar.update(chunk_size * num_threads)
bar.close()
return filename
def download_ftp(input_str: str, filename: str, num_threads: int = 4) -> None:
os.makedirs('Download', exist_ok=True)
filename = os.path.join('Download', filename)
o = urlparse(input_str)
if o.scheme != 'ftp':
raise ValueError(f'{input_str} is not an FTP URL.')
ftp_url = input_str
ftp_file_size = -1
req = urllib.request.urlopen(ftp_url)
if 'Content-Length' in req.headers:
ftp_file_size = int(req.headers['Content-Length'])
with open(filename, 'wb') as file:
with tqdm(total=ftp_file_size, unit='B', unit_scale=True,
desc=os.path.basename(filename), ncols=80, position=0) as bar:
thread_pool = []
chunk_size = 1024
if ftp_file_size < 0:
thread = FTPDownloadThread(ftp_url, 0, 0xFFFFFFFF, file, bar)
thread.setDaemon(True)
thread_pool.append(thread)
else:
download_time = 0
avg_speed = 0
last_speed = 0
while True:
start = time.time()
chunk = urllib.request.urlopen(ftp_url).read(chunk_size * num_threads)
end = time.time()
if not chunk:
break
used_time = end - start
download_time += used_time
speed = chunk_size * num_threads / used_time
avg_speed = (avg_speed + speed) / 2
if avg_speed > 0:
last_speed = avg_speed
num_threads = int(ftp_file_size / avg_speed / chunk_size) + 1
num_threads = min(max(num_threads, 1), 32)
thread_pool = []
start = 0
end = -1
for i in range(num_threads):
start = end + 1
end = start + chunk_size - 1
if i == num_threads - 1:
end = ftp_file_size - 1
thread = FTPDownloadThread(ftp_url=ftp_url, start=start, end=end, fileobj=file, bar=bar)
thread.setDaemon(True)
thread_pool.append(thread)
for thread in thread_pool:
thread.start()
for thread in thread_pool:
thread.join()
bar.update(chunk_size * num_threads)
bar.close()
return filename
def download_single_thread(url: str, filename: str) -> None:
response = requests.get(url, stream=True)
with open(filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
功能:
- 支持 HTTP 和 FTP 协议下载文件
- 多线程下载,提高下载速度
- 使用进度条显示下载进度
- 自动调整线程数量,以优化下载速度
使用方法:
# 下载 HTTP 文件
download_file('https://example.com/file.zip', 'file.zip')
# 下载 FTP 文件
download_ftp('ftp://user:password@example.com/file.txt', 'file.txt')
注意:
- 下载 FTP 文件需要提供正确的用户名和密码
- 默认线程数量为 4,可以根据需要进行调整
- 下载路径为
Download文件夹,如果不存在则自动创建
代码改进:
- 变量命名更具描述性
- 添加错误处理
- 优化代码结构,提高可读性
- 使用 tqdm 库显示下载进度
- 自动调整线程数量,以优化下载速度
原文地址: https://www.cveoy.top/t/topic/oHSt 著作权归作者所有。请勿转载和采集!