import os
import time
import psutil
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.common.by import By
import openpyxl
import requests

# 检查是否已经存在浏览器实例,如果存在则不再创建
for proc in psutil.process_iter():
    try:
        if 'chrome' in proc.name() and '--remote-debugging-port=9224' in proc.cmdline():
            options = Options()
            options.add_experimental_option('debuggerAddress', '127.0.0.1:9224')
            break
    except:
        pass
else:
    # 创建浏览器实例
    os.system(r'start chrome --remote-debugging-port=9224 --user-data-dir="D:\评阅用"')
    options = Options()
    options.add_experimental_option('debuggerAddress', '127.0.0.1:9224')

# 在已有的浏览器实例中查找标签页
driver = webdriver.Chrome(options=options)
tabs = driver.window_handles
for tab in tabs:
    driver.switch_to.window(tab)
    if driver.title == '考后核验':
        print('登陆成功')
        break

# 打开Excel表格
wb = openpyxl.load_workbook(r'C:\Users\Administrator\Desktop\考后核验.xlsx')
sheet = wb.active

# 创建照片文件夹
if not os.path.exists(r'C:\Users\Administrator\Desktop\照片'):
    os.mkdir(r'C:\Users\Administrator\Desktop\照片')

# 遍历时间单元K列,将时间信息存储到集合中去重
time_set = set()
for row in sheet.iter_rows(min_row=2, max_row=sheet.max_row, min_col=11, max_col=11):
    for cell in row:
        data = cell.value
        if data:
            time_set.add(data)

# 创建以时间命名的子文件夹
for data in time_set:
    folder_name = time.strftime('%Y-%m-%d %H-%M-%S', time.strptime(data, '%Y-%m-%d %H:%M:%S'))
    folder_path = os.path.join(r'C:\Users\Administrator\Desktop\照片', folder_name)
    if not os.path.exists(folder_path):
        os.mkdir(folder_path)

# 遍历表格内'A'列数据,获取学号并在网页中进行查询
for row in sheet.iter_rows(min_row=2, max_row=sheet.max_row, min_col=1, max_col=1):
    for cell in row:
        student_id = cell.value
        if student_id:
            search_input = driver.find_element(By.XPATH, '//input[@placeholder="姓名/考生号"]')
            search_input.clear()
            search_input.send_keys(student_id)
            time.sleep(1)  # 等待页面加载完成
            search_button = driver.find_element(By.XPATH, '//button[@class="ant-btn ant-btn-primary"]')
            driver.execute_script('arguments[0].click();', search_button)
            time.sleep(1)  # 等待页面加载完成

            # 获取页面中的图片链接
            img_elements = driver.find_elements(By.XPATH, '//img[@title="报名照片"]')
            for img_element in img_elements:
                img_src = img_element.get_attribute('src')
                img_url = 'http://example.com' + img_src  # 将图片链接拼接完整
                img_name = img_src.split('/')[-1]

                # 下载图片并保存到对应的时间文件夹中
                response = requests.get(img_url)
                with open(os.path.join(folder_path, img_name), 'wb') as f:
                    f.write(response.content)

# 保存Excel表格
wb.save(r'C:\Users\Administrator\Desktop\考后核验.xlsx')
Python+Selenium自动化脚本:实现考后信息核验与照片分类

原文地址: http://www.cveoy.top/t/topic/fJvo 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录