Click Download File Function for Efficient File Downloading
def click_download_file(self, urn, path):
'Click download button, download file'
downloaded_files_success = list() # Download successful file list
downloaded_files_failed = list() # Download failed file list
links = self.browser.safe_get_link(By.CSS_SELECTOR, ATTACHMENT_VIEW_WINDOW_ELEMENT)
for link in links:
# Get file name and size
file_title, download_time_min, download_time_max, file_size = self.file_title_data(link, urn)
file_downloaded = False
download_attempts = 0
while not file_downloaded and download_attempts < 3:
if download_attempts == 0:
download_time = download_time_min
else:
download_time = download_time_max
self.browser.click_with_link(link)
self.download_alert() # File download is too large
logger.info(
'
>> 【urn = {}】文件名{}文件大小{},下载时间{}'.format(urn, file_title, file_size, download_time))
time.sleep(download_time) # File download time
# File download
status = self.download_file_is_successfully(path, file_title, download_time)
if status:
downloaded_files_success.append(file_title) # Download successful
file_downloaded = True
else:
logger.info('
>> 【urn = {}】文件{}重试下载失败'.format(urn, file_title))
download_attempts += 1
if not file_downloaded:
downloaded_files_failed.append(file_title) # Download failed
if downloaded_files_failed:
logger.info('
>> 【urn = {}】下载失败文件列表:{}'.format(urn, downloaded_files_failed))
raise FileException(message=ErrorMsg.NO_DOWNLOADABLE_FILES, file_list=downloaded_files_failed)
if downloaded_files_success:
logger.info('
>> 【urn = {}】下载成功文件列表:{}'.format(urn, downloaded_files_success))
new_file_title = self.modify_file(path, urn, downloaded_files_success) # Download successful file name modification
return new_file_title
def download_file_is_successfully(cls, path, file_name, download_time):
'Whether the file is downloaded successfully'
file_path = os.path.join(path, file_name)
file_path_with_suffix = file_path + '.crdownload'
last_size = 0
while os.path.exists(file_path_with_suffix):
cur_size = os.path.getsize(file_path_with_suffix)
speed = cur_size - last_size
last_size = cur_size
if speed > 0:
time.sleep(download_time)
else:
return False
if os.path.exists(file_path): # File exists
return True
return False
def file_title_data(self, link, urn):
'Get file information: file name, size'
title = link.get_attribute('title')
file_titles = re.findall(REG, title)
if not file_titles:
file_titles = re.findall(CHINE_REG, title)
if len(file_titles) == 0:
QueryResult.update(UPDATE_INVOICES_SQL, (generate_utils.get_strtime(), urn))
file_title = file_titles[0].strip() # File name
# [Original File] 230324043.eml (14159) kB
file_size_str = title.split('(')[1].split(')')[0].strip()
file_size = int(file_size_str) # File size
download_time_min, download_time_max = self.calculate_download_time_limit(file_size) # Download time
download_time_min = min(download_time_min, 60)
return file_title, download_time_min, download_time_max, file_size
原文地址: https://www.cveoy.top/t/topic/qhKT 著作权归作者所有。请勿转载和采集!