使用 Python 将数据集划分成训练集、验证集和测试集
import random
# 读取数据集
with open('data.txt', 'r') as file:
data = file.readlines()
# 分类数据
categories = {}
for line in data:
line = line.strip().split(',')
category = int(line[-1])
if category not in categories:
categories[category] = []
categories[category].append(line[:-1])
# 随机打乱数据
random.seed(42)
for category in categories:
random.shuffle(categories[category])
# 划分训练集、验证集和测试集
train_set = []
val_set = []
test_set = []
for category in categories:
train_set.extend(categories[category][:1000])
val_set.extend(categories[category][1000:1100])
test_set.extend(categories[category][1100:])
# 随机打乱训练集、验证集和测试集
random.shuffle(train_set)
random.shuffle(val_set)
random.shuffle(test_set)
# 保存数据集到文件
with open('train_set.txt', 'w') as file:
for line in train_set:
file.write(','.join(line) + '\n')
with open('val_set.txt', 'w') as file:
for line in val_set:
file.write(','.join(line) + '\n')
with open('test_set.txt', 'w') as file:
for line in test_set:
file.write(','.join(line) + '\n')
原文地址: https://www.cveoy.top/t/topic/jIcU 著作权归作者所有。请勿转载和采集!