import random

Read the dataset from the txt file

data = [] with open('dataset.txt', 'r') as file: for line in file: data.append(line.strip().split(','))

Split the data into train, validation and test sets for each class

train_set = [] validation_set = [] test_set = [] class_counts = [0] * 8

for sample in data: label = int(sample[-1]) if class_counts[label] < 1000: train_set.append(sample) class_counts[label] += 1 elif class_counts[label] < 1100: validation_set.append(sample) class_counts[label] += 1 elif class_counts[label] < 1200: test_set.append(sample) class_counts[label] += 1

Shuffle the train, validation and test sets

random.shuffle(train_set) random.shuffle(validation_set) random.shuffle(test_set)

Save the train set to a txt file

with open('train_set.txt', 'w') as file: for sample in train_set: file.write(','.join(sample) + '\n')

Save the validation set to a txt file

with open('validation_set.txt', 'w') as file: for sample in validation_set: file.write(','.join(sample) + '\n')

Save the test set to a txt file

with open('test_set.txt', 'w') as file: for sample in test_set: file.write(','.join(sample) + '\n')

7718323310103103100252142194002100321234771832331010310310025219080201021008649800872802731100701830903103002512390802010010101007在一txt文件夹中有如上格式的数据集每条样本前23位为特征值最后1位为类别标签标签共有8个类别在这个数据集中8个类别的数据每个类别分别随机取