import os import time import psutil from selenium.webdriver.chrome.options import Options from selenium import webdriver from selenium.webdriver.common.by import By import openpyxl import requests

检查是否已经存在浏览器实例,如果存在则不再创建

for proc in psutil.process_iter(): try: if 'chrome' in proc.name() and '--remote-debugging-port=9224' in proc.cmdline(): options = Options() options.add_experimental_option('debuggerAddress', '127.0.0.1:9224') break except: pass else: # 创建浏览器实例 os.system(r'start chrome --remote-debugging-port=9224 --user-data-dir="D:\评阅用"') options = Options() options.add_experimental_option('debuggerAddress', '127.0.0.1:9224')

在已有的浏览器实例中查找标签页

driver = webdriver.Chrome(options=options) tabs = driver.window_handles for tab in tabs: driver.switch_to.window(tab) if driver.title == '考后核验': print('登陆成功') break

打开Excel表格

wb = openpyxl.load_workbook(r'C:\Users\Administrator\Desktop\考后核验.xlsx') sheet = wb.active

创建照片文件夹

photo_folder = r'C:\Users\Administrator\Desktop\照片' if not os.path.exists(photo_folder): os.mkdir(photo_folder)

遍历表格内'A'列数据

for row in sheet.iter_rows(min_row=2, max_row=sheet.max_row, min_col=1, max_col=1): for cell in row: # 获取学号 student_id = cell.value if student_id: # 在网页内查找考生信息 search_input = driver.find_element(By.XPATH, '//input[@placeholder="姓名/考生号"]') search_input.clear() search_input.send_keys(student_id) time.sleep(1) # 等待页面加载完成 search_button = driver.find_element(By.XPATH, '//button[@class="ant-btn ant-btn-primary"]') driver.execute_script("arguments[0].click();", search_button) time.sleep(1) # 等待页面加载完成

        # 获取时间和考场编号元素
        time_elements = driver.find_elements(By.XPATH,
                                             '//td[@class="ant-table-column-has-actions ant-table-column-has-sorters ant-table-column-sort" and @style="text-align: center;"]')
        room_elements = driver.find_elements(By.XPATH,
                                             '//td[@class="ant-table-column-has-actions ant-table-column-has-sorters" and @style="text-align: center;"]')

        # 获取K列和H列数据
        k_value = None
        h_value = None
        if len(row) >= 11:
            k_value = row[10].value  # K列数据
        if len(row) >= 8:
            h_value = row[7].value  # H列数据

        # 遍历时间和考场编号元素
        for i in range(len(time_elements)):
            time_element = time_elements[i]
            room_element = room_elements[i]

            # 提取时间和考场编号
            time_value = time_element.text.split(' ')[0]
            room_value = room_element.text

            # 检查时间和考场编号是否匹配
            if time_value == k_value and room_value == h_value:
                # 创建子文件夹
                folder_name = os.path.join(photo_folder, time_value)
                if not os.path.exists(folder_name):
                    os.mkdir(folder_name)
                sub_folder_name = os.path.join(folder_name, room_value)
                if not os.path.exists(sub_folder_name):
                    os.mkdir(sub_folder_name)

                # 获取照片元素
                photo_element = driver.find_element(By.XPATH, '//div[@class="img-lazy-load"]/img')
                photo_url = photo_element.get_attribute("src")

                # 下载照片
                photo_name = os.path.join(sub_folder_name, f"{student_id}.jpg")
                response = requests.get(photo_url)
                with open(photo_name, "wb") as f:
                    f.write(response.content)

                print(f"下载学号为{student_id}的照片成功")
                break
Python 自动化下载考生照片

原文地址: https://www.cveoy.top/t/topic/fKHN 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录