Python 代码:检查并提取两个文件夹中的重复视频
下面是一个检查并提取两个文件夹内部所有重复视频的 Python 代码:
import os
import hashlib
import shutil
def get_file_hash(file_path):
# 计算文件的哈希值
with open(file_path, 'rb') as f:
data = f.read()
file_hash = hashlib.md5(data).hexdigest()
return file_hash
def find_duplicate_videos(folder1, folder2):
# 获取文件夹内所有视频文件的哈希值
folder1_videos = {}
for root, dirs, files in os.walk(folder1):
for file in files:
if file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
file_path = os.path.join(root, file)
file_hash = get_file_hash(file_path)
folder1_videos[file_hash] = file_path
# 检查第二个文件夹内的视频是否与第一个文件夹内的视频重复
duplicate_videos = []
for root, dirs, files in os.walk(folder2):
for file in files:
if file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
file_path = os.path.join(root, file)
file_hash = get_file_hash(file_path)
if file_hash in folder1_videos:
duplicate_videos.append(file_path)
return duplicate_videos
def extract_duplicate_videos(duplicate_videos, output_folder):
# 将重复视频提取到单独的文件夹内
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for file_path in duplicate_videos:
file_name = os.path.basename(file_path)
output_path = os.path.join(output_folder, file_name)
shutil.copy2(file_path, output_path)
# 示例用法
folder1 = 'path/to/folder1'
folder2 = 'path/to/folder2'
output_folder = 'path/to/output_folder'
duplicate_videos = find_duplicate_videos(folder1, folder2)
extract_duplicate_videos(duplicate_videos, output_folder)
请将 folder1 和 folder2 替换为你要检查的两个文件夹路径,将 output_folder 替换为提取重复视频的输出文件夹路径。
原文地址: https://www.cveoy.top/t/topic/pvZI 著作权归作者所有。请勿转载和采集!