错误提示为"All arrays must be of the same length",意思是所有的数组必须拥有相同的长度。在这里,出现这个错误是因为names和new_results数组的长度不一样。

修改代码如下:

  1. 在构造测试集数据列表时,将图片名称也加入到test_datas数组中;
  2. 修改构造数据集中,如果是测试集的情况,只将图片的绝对路径添加到data数组中,不再添加标签。

修改后的代码如下所示:

import os import numpy as np import pandas as pd import paddle import paddle.vision.transforms as T from PIL import Image

数据列表写入.txt文件

def data_to_txt(datas, save_path): with open(save_path, 'w') as f: for i in datas: f.write(f'{i}\n')

构造带标签的数据列表

datas_with_label = [] with open('/kaggle/input/sxcattxt/train_list.txt', 'r') as f: for line in f.readlines(): line = line.strip() datas_with_label.append(f'/kaggle/input/shisuncat/cat_12_train/{line}') # 图片绝对路径 标签

打乱带标签的数据列表

np.random.shuffle(datas_with_label)

按照8:2划分训练集和验证集

train_datas = datas_with_label[len(datas_with_label)//102:] val_datas = datas_with_label[:len(datas_with_label)//102] print('train_datas len:', len(train_datas)) print('val_datas len:', len(val_datas))

写入train.txt、val.txt文件

data_to_txt(train_datas, '/kaggle/working/train.txt') data_to_txt(val_datas, '/kaggle/working/val.txt')

构造测试集数据列表

test_datas = [] test_dir = '/kaggle/input/shisuncat/cat_12_test/cat_12_test' for i in os.listdir(test_dir): test_datas.append(os.path.join(test_dir, i)) # 添加图片名称 test_datas.append(i) print('test_datas len:', len(test_datas))

写入test.txt

data_to_txt(test_datas, '/kaggle/working/test.txt')

构造数据集

class CatDataset(paddle.io.Dataset): def init(self, txtpath, mode='train', transform=None): super(CatDataset, self).init()

    assert mode in ['train', 'val', 'test'], "mode is one of ['train', 'val', 'test']"
    self.mode = mode
    self.transform = transform
    self.data = []

    with open(txtpath, 'r') as f:
        for line in f.readlines():
            line = line.strip()
            if mode != 'test':
                self.data.append([line.split('\t')[0], line.split('\t')[1]])
            else:
                # 只添加图片的绝对路径
                self.data.append(line)

def __getitem__(self, idx):
    if self.mode != 'test':
        img = Image.open(self.data[idx][0]).convert('RGB')
        label = self.data[idx][1]
        if self.transform:
            img = self.transform(img)
        return img.astype('float32'), np.array(label, dtype='int64')
    else:
        img = Image.open(self.data[idx*2]).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img.astype('float32')

def __len__(self):
    if self.mode != 'test':
        return len(self.data)
    else:
        # 返回测试集的数量
        return len(self.data) // 2

数据批大小

batch_size = 30 # 当内存不够的时候,调小

尺寸

size = 48 # 可调

transform

train_transform = T.Compose([ T.Resize(size=size), T.RandomRotation(degrees=30, interpolation='bilinear'), T.ColorJitter(0.3, 0.3, 0.3, 0.3), T.CenterCrop(size=size), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) eval_transform = T.Compose([ T.Resize(size=size), T.CenterCrop(size=size), T.ToTensor(), T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ])

dataset

train_dataset = CatDataset(txtpath='/kaggle/working/train.txt', mode='train', transform=train_transform) val_dataset = CatDataset(txtpath='/kaggle/working/val.txt', mode='val', transform=eval_transform) test_dataset = CatDataset(txtpath='/kaggle/working/test.txt', mode='test', transform=eval_transform)

dataloader

train_dataloader = paddle.io.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) val_dataloader = paddle.io.DataLoader(dataset=val_dataset, batch_size=batch_size) test_dataloader = paddle.io.DataLoader(dataset=test_dataset, batch_size=batch_size)

print

print('train_dataloader len:', len(train_dataloader)) print('val_dataloader len:', len(val_dataloader)) print('test_dataloader len:', len(test_dataloader))

分类类别

num_classes = 12

模型

model = paddle.vision.models.resnet50(num_classes=num_classes)

优化器

optimizer = paddle.optimizer.Adam(learning_rate=1e-4, parameters=model.parameters(), weight_decay=1e-5) # 优化器可更换

损失函数

loss = paddle.nn.CrossEntropyLoss()

评价指标

acc = paddle.metric.Accuracy()

高层API封装

model = paddle.Model(model) model.prepare(optimizer, loss, acc)

打印模型结构

batch_size = 1 size = 224 model.summary((batch_size, 3, size, size))

训练轮数

epochs = 2

模型训练

model.fit(train_dataloader, val_dataloader, epochs=epochs, verbose=1)

得到结果

results = model.predict(test_dataloader)

得到分类类别

new_results = [] for batch in results: for result in batch: new_results.append(np.argmax(result))

print('new_results len:', len(new_results), '; new_results[0] =', new_results[0])

得到图片名称

names = [] for i in range(len(test_dataset.data) // 2): names.append(test_dataset.data[i*2+1]) print('names len:', len(names))

保存结果

pd_results = pd.DataFrame({'names':names, 'results':new_results}) pd_results.to_csv('/kaggle/working/result.csv', header=False, index=False)

损失函数和正确率

train_loss = model.train_loss train_acc = model.train_acc val_loss = model.valid_loss val_acc = model.valid_acc

打印损失函数和正确率

print("train_loss:", train_loss) print("train_acc:", train_acc) print("val_loss:", val_loss) print("val_acc:", val_acc

以下源代码怎么报了如下错误?请给出修改后的代码源代码import osimport numpy as npimport pandas as pdimport paddleimport paddlevisiontransforms as Tfrom PIL import Image# 数据列表写入txt文件def data_to_txtdatas save_path with opensave

原文地址: https://www.cveoy.top/t/topic/hfbW 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录