以下源代码怎么报了如下错误?请给出修改后的代码源代码import osimport numpy as npimport pandas as pdimport paddleimport paddlevisiontransforms as Tfrom PIL import Image# 数据列表写入txt文件def data_to_txtdatas save_path with opensave
错误提示为"All arrays must be of the same length",意思是所有的数组必须拥有相同的长度。在这里,出现这个错误是因为names和new_results数组的长度不一样。
修改代码如下:
- 在构造测试集数据列表时,将图片名称也加入到test_datas数组中;
- 修改构造数据集中,如果是测试集的情况,只将图片的绝对路径添加到data数组中,不再添加标签。
修改后的代码如下所示:
import os import numpy as np import pandas as pd import paddle import paddle.vision.transforms as T from PIL import Image
数据列表写入.txt文件
def data_to_txt(datas, save_path): with open(save_path, 'w') as f: for i in datas: f.write(f'{i}\n')
构造带标签的数据列表
datas_with_label = [] with open('/kaggle/input/sxcattxt/train_list.txt', 'r') as f: for line in f.readlines(): line = line.strip() datas_with_label.append(f'/kaggle/input/shisuncat/cat_12_train/{line}') # 图片绝对路径 标签
打乱带标签的数据列表
np.random.shuffle(datas_with_label)
按照8:2划分训练集和验证集
train_datas = datas_with_label[len(datas_with_label)//102:] val_datas = datas_with_label[:len(datas_with_label)//102] print('train_datas len:', len(train_datas)) print('val_datas len:', len(val_datas))
写入train.txt、val.txt文件
data_to_txt(train_datas, '/kaggle/working/train.txt') data_to_txt(val_datas, '/kaggle/working/val.txt')
构造测试集数据列表
test_datas = [] test_dir = '/kaggle/input/shisuncat/cat_12_test/cat_12_test' for i in os.listdir(test_dir): test_datas.append(os.path.join(test_dir, i)) # 添加图片名称 test_datas.append(i) print('test_datas len:', len(test_datas))
写入test.txt
data_to_txt(test_datas, '/kaggle/working/test.txt')
构造数据集
class CatDataset(paddle.io.Dataset): def init(self, txtpath, mode='train', transform=None): super(CatDataset, self).init()
assert mode in ['train', 'val', 'test'], "mode is one of ['train', 'val', 'test']"
self.mode = mode
self.transform = transform
self.data = []
with open(txtpath, 'r') as f:
for line in f.readlines():
line = line.strip()
if mode != 'test':
self.data.append([line.split('\t')[0], line.split('\t')[1]])
else:
# 只添加图片的绝对路径
self.data.append(line)
def __getitem__(self, idx):
if self.mode != 'test':
img = Image.open(self.data[idx][0]).convert('RGB')
label = self.data[idx][1]
if self.transform:
img = self.transform(img)
return img.astype('float32'), np.array(label, dtype='int64')
else:
img = Image.open(self.data[idx*2]).convert('RGB')
if self.transform:
img = self.transform(img)
return img.astype('float32')
def __len__(self):
if self.mode != 'test':
return len(self.data)
else:
# 返回测试集的数量
return len(self.data) // 2
数据批大小
batch_size = 30 # 当内存不够的时候,调小
尺寸
size = 48 # 可调
transform
train_transform = T.Compose([ T.Resize(size=size), T.RandomRotation(degrees=30, interpolation='bilinear'), T.ColorJitter(0.3, 0.3, 0.3, 0.3), T.CenterCrop(size=size), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) eval_transform = T.Compose([ T.Resize(size=size), T.CenterCrop(size=size), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ])
dataset
train_dataset = CatDataset(txtpath='/kaggle/working/train.txt', mode='train', transform=train_transform) val_dataset = CatDataset(txtpath='/kaggle/working/val.txt', mode='val', transform=eval_transform) test_dataset = CatDataset(txtpath='/kaggle/working/test.txt', mode='test', transform=eval_transform)
dataloader
train_dataloader = paddle.io.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) val_dataloader = paddle.io.DataLoader(dataset=val_dataset, batch_size=batch_size) test_dataloader = paddle.io.DataLoader(dataset=test_dataset, batch_size=batch_size)
print('train_dataloader len:', len(train_dataloader)) print('val_dataloader len:', len(val_dataloader)) print('test_dataloader len:', len(test_dataloader))
分类类别
num_classes = 12
模型
model = paddle.vision.models.resnet50(num_classes=num_classes)
优化器
optimizer = paddle.optimizer.Adam(learning_rate=1e-4, parameters=model.parameters(), weight_decay=1e-5) # 优化器可更换
损失函数
loss = paddle.nn.CrossEntropyLoss()
评价指标
acc = paddle.metric.Accuracy()
高层API封装
model = paddle.Model(model) model.prepare(optimizer, loss, acc)
打印模型结构
batch_size = 1 size = 224 model.summary((batch_size, 3, size, size))
训练轮数
epochs = 2
模型训练
model.fit(train_dataloader, val_dataloader, epochs=epochs, verbose=1)
得到结果
results = model.predict(test_dataloader)
得到分类类别
new_results = [] for batch in results: for result in batch: new_results.append(np.argmax(result))
print('new_results len:', len(new_results), '; new_results[0] =', new_results[0])
得到图片名称
names = [] for i in range(len(test_dataset.data) // 2): names.append(test_dataset.data[i*2+1]) print('names len:', len(names))
保存结果
pd_results = pd.DataFrame({'names':names, 'results':new_results}) pd_results.to_csv('/kaggle/working/result.csv', header=False, index=False)
损失函数和正确率
train_loss = model.train_loss train_acc = model.train_acc val_loss = model.valid_loss val_acc = model.valid_acc
打印损失函数和正确率
print("train_loss:", train_loss) print("train_acc:", train_acc) print("val_loss:", val_loss) print("val_acc:", val_acc
原文地址: https://www.cveoy.top/t/topic/hfbW 著作权归作者所有。请勿转载和采集!