MindSpore ResNet 实现数字识别 - 代码示例和优化
import numpy as np
import mindspore.dataset as ds
import os
import cv2
import mindspore
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.common.initializer import Normal
from mindspore import context
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.train import Model
from mindspore.nn.metrics import Accuracy
from scipy.integrate._ivp.radau import P
np.random.seed(58)
class BasicBlock(nn.Cell):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, has_bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, has_bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
self.add = P.TensorAdd()
def construct(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out = self.add(out, identity)
out = self.relu(out)
return out
class ResNet(nn.Cell):
def __init__(self, block, layers, num_classes=10):
super(ResNet, self).__init__()
self.in_channels = 64
self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, pad_mode='pad', has_bias=False) # 更改 padding 和 pad_mode
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU()
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='same')
self.layer1 = self.make_layer(block, 64, layers[0])
self.layer2 = self.make_layer(block, 128, layers[1], stride=2)
self.layer3 = self.make_layer(block, 256, layers[2], stride=2)
self.layer4 = self.make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(kernel_size=10, stride=1)
self.flatten = nn.Flatten()
self.fc = nn.Dense(512, num_classes)
def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if stride != 1 or self.in_channels != out_channels:
downsample = nn.SequentialCell([
nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride, has_bias=False),
nn.BatchNorm2d(out_channels)
])
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels
for _ in range(1, blocks):
layers.append(block(out_channels, out_channels))
return nn.SequentialCell(layers)
def construct(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = self.flatten(x)
x = self.fc(x)
return x
class TrainDatasetGenerator:
def __init__(self, file_path):
self.file_path = file_path
self.img_names = os.listdir(file_path)
def __getitem__(self, index):
data = cv2.imread(os.path.join(self.file_path, self.img_names[index]))
label = int(self.img_names[index][0])-1
#label = np.array([label])
data = data.transpose().astype(np.float32) / 255.
#data = np.expand_dims(data, axis=0)
#data = Tensor(data)
#label = Tensor(label)
return data, label
def __len__(self):
return len(self.img_names)#数据集中图像的数量
def train_lenet():
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
train_dataset_generator = TrainDatasetGenerator('D:/pythonProject7/train')
ds_train = ds.GeneratorDataset(train_dataset_generator, ['data', 'label'], shuffle=True)
ds_train = ds_train.shuffle(buffer_size=10)
ds_train = ds_train.batch(batch_size=4, drop_remainder=True)
valid_dataset_generator = TrainDatasetGenerator('D:/pythonProject7/test')
ds_valid = ds.GeneratorDataset(valid_dataset_generator, ['data', 'label'], shuffle=True)
ds_valid = ds_valid.batch(batch_size=4, drop_remainder=True)
network = ResNet(BasicBlock, [2, 2, 2, 2])
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.01, momentum=0.9)
time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
config_ck = CheckpointConfig(save_checkpoint_steps=10, keep_checkpoint_max=10)
config_ckpt_path = 'D:/code/machine vision course/digit-mindspore/ckpt/'
ckpoint_cb = ModelCheckpoint(prefix='checkpoint_lenet', directory=config_ckpt_path, config=config_ck)
model = Model(network, net_loss, net_opt, metrics={'Accuracy': Accuracy()})
epoch_size = 10
print('============== Starting Training =============')
model.train(epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()])
acc = model.eval(ds_valid)
print('============== {} ============='.format(acc))
epoch_size = 10
print('============== Starting Training =============')
model.train(epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()])
acc = model.eval(ds_valid)
print('============== {} ============='.format(acc))
epoch_size = 10
print('============== Starting Training =============')
model.train(epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()])
acc = model.eval(ds_valid)
print('============== {} ============='.format(acc))
if __name__ == '__main__':
train_lenet()
代码优化和解释:
-
修改
pad和pad_mode: 在ResNet的构造函数中,将self.conv1的padding设置为3,pad_mode设置为'pad'。这样做是为了解决ValueError异常,该异常发生在第一次卷积操作时,由于pad_mode默认值为'same',而padding不为0,导致发生冲突。 -
数据预处理: 在
TrainDatasetGenerator类中,对图像数据进行了预处理,包括:- 将图像数据类型转换为
np.float32并除以255,将其归一化到0-1之间。 - 使用
transpose方法调整图像的维度,以便与MindSpore的Conv2d操作兼容。
- 将图像数据类型转换为
-
训练和评估: 代码中使用
Model类进行模型训练和评估,并使用以下回调函数:TimeMonitor:监控训练过程的时间。ModelCheckpoint:保存训练过程中的模型。LossMonitor:监控训练过程中的损失值。
-
代码注释: 代码中添加了详细的注释,解释了每个代码段的作用。
示例的用途:
该代码示例演示了如何使用 MindSpore 构建 ResNet 模型进行数字识别。您可以将该代码作为起点,学习如何使用 MindSpore 进行深度学习模型开发,并进一步扩展该示例以实现更复杂的功能。
如何使用:
- 安装 MindSpore:
pip install mindspore - 将代码保存为
.py文件。 - 修改代码中的
train_dataset_generator和valid_dataset_generator的文件路径,指向您的训练数据和验证数据所在目录。 - 运行代码,开始训练模型。
注意:
- 此代码仅使用 CPU 进行训练,如果您有 GPU,可以修改
context.set_context中的device_target参数以使用 GPU。 - 您可以根据自己的需求调整训练参数,例如
learning_rate、batch_size和epoch_size。 - 此代码示例仅是一个简单的数字识别模型,您可以根据自己的任务,使用不同的数据集和模型结构进行改进。
原文地址: https://www.cveoy.top/t/topic/mQoD 著作权归作者所有。请勿转载和采集!