Python自动化下载考生照片
import os
import time
import psutil
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.common.by import By
import openpyxl
import requests
# 检查是否已经存在浏览器实例,如果存在则不再创建
for proc in psutil.process_iter():
try:
if 'chrome' in proc.name() and '--remote-debugging-port=9224' in proc.cmdline():
options = Options()
options.add_experimental_option('debuggerAddress', '127.0.0.1:9224')
break
except:
pass
else:
# 创建浏览器实例
os.system(r'start chrome --remote-debugging-port=9224 --user-data-dir="D:\评阅用"')
options = Options()
options.add_experimental_option('debuggerAddress', '127.0.0.1:9224')
# 在已有的浏览器实例中查找标签页
driver = webdriver.Chrome(options=options)
tabs = driver.window_handles
for tab in tabs:
driver.switch_to.window(tab)
if driver.title == '考后核验':
print('登陆成功')
break
# 打开Excel表格
wb = openpyxl.load_workbook(r'C:\Users\Administrator\Desktop\考后核验.xlsx')
sheet = wb.active
# 创建照片文件夹
photo_folder = r'C:\Users\Administrator\Desktop\照片'
if not os.path.exists(photo_folder):
os.mkdir(photo_folder)
# 遍历表格内'A'列数据
for row in sheet.iter_rows(min_row=2, max_row=sheet.max_row, min_col=1, max_col=1):
for cell in row:
# 获取学号
student_id = cell.value
if student_id:
# 在网页内查找考生信息
search_input = driver.find_element(By.XPATH, '//input[@placeholder="姓名/考生号"]')
search_input.clear()
search_input.send_keys(student_id)
time.sleep(1) # 等待页面加载完成
search_button = driver.find_element(By.XPATH, '//button[@class="ant-btn ant-btn-primary"]')
driver.execute_script('arguments[0].click();', search_button)
time.sleep(1) # 等待页面加载完成
# 获取照片元素
photo_element = driver.find_element(By.XPATH, '//div[@class="img-lazy-load"]/img')
photo_url = photo_element.get_attribute('src')
# 获取时间和考场编号元素
time_elements = driver.find_elements(By.XPATH,
'//td[@class="ant-table-column-has-actions ant-table-column-has-sorters ant-table-column-sort" and @style="text-align: center;"]')
room_elements = driver.find_elements(By.XPATH,
'//td[@class="ant-table-column-has-actions ant-table-column-has-sorters" and @style="text-align: center;"]')
# 获取K列和H列数据
k_value = None
h_value = None
if len(row) >= 11:
k_value = row[10].value # K列数据
if len(row) >= 8:
h_value = row[7].value # H列数据
# 遍历时间和考场编号元素
for i in range(len(time_elements)):
time_element = time_elements[i]
room_element = room_elements[i]
# 提取时间和考场编号
time_value = time_element.text.split(' ')[0]
room_value = room_element.text
# 检查时间和考场编号是否匹配
if time_value == k_value and room_value == h_value:
# 创建子文件夹
folder_name = os.path.join(photo_folder, time_value)
if not os.path.exists(folder_name):
os.mkdir(folder_name)
sub_folder_name = os.path.join(folder_name, room_value)
if not os.path.exists(sub_folder_name):
os.mkdir(sub_folder_name)
# 下载照片
photo_name = os.path.join(sub_folder_name, f'{student_id}.jpg')
response = requests.get(photo_url)
with open(photo_name, 'wb') as f:
f.write(response.content)
print(f'下载学号为{student_id}的照片成功')
break
else:
print(f'未找到学号为{student_id}的考生信息')
continue
原文地址: https://www.cveoy.top/t/topic/fKHS 著作权归作者所有。请勿转载和采集!