LeNet5 人脸识别模型训练与测试 - MindSpore 实践
使用 MindSpore 训练 LeNet5 模型并进行人脸识别
本教程将使用 MindSpore 框架训练 LeNet5 模型,并使用训练好的模型进行人脸识别。教程涵盖数据预处理、模型构建、训练和测试等步骤,并提供代码示例。
1. 导入必要的库
import numpy as np
import mindspore.dataset as ds
import os
import cv2
from mindspore import Tensor
import mindspore.nn as nn
from mindspore.common.initializer import Normal
from mindspore import context
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.train import Model
from mindspore.nn.metrics import Accuracy
np.random.seed(58)
2. 定义 LeNet5 网络
class LeNet5(nn.Cell):
'''
Lenet network
Args:
num_class (int): Number of classes. Default: 10.
num_channel (int): Number of channels. Default: 1.
Returns:
Tensor, output tensor
Examples:
>>> LeNet(num_class=10)
'''
def __init__(self, num_class=34, num_channel=3, include_top=True):
super(LeNet5, self).__init__()
self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')
self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')
self.relu = nn.ReLU()
self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
# 是否包含dense层,也就是全连接层
self.include_top = include_top
if self.include_top:
self.flatten = nn.Flatten()
self.fc1 = nn.Dense(784, 120, weight_init=Normal(0.02)) # 16^2=256 28^2=784
self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
def construct(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.conv2(x)
x = self.relu(x)
x = self.max_pool2d(x)
if not self.include_top:
return x
x = self.flatten(x)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
3. 定义数据集生成器
class TrainDatasetGenerator:
def __init__(self, file_path):
self.file_path = file_path
self.img_names = os.listdir(file_path) # os.listdir()方法用于返回指定的文件夹包含的文件或文件夹的名字的列表,但有个很明显的缺点,它的默认顺序不是有序的或者说不是通常的顺序(不知道用啥排的)。
def __getitem__(self, index):
# os.path.join()函数:连接两个或更多的路径名组件
# 如果各组件名首字母不包含’/’,则函数会自动加上
# 如果有一个组件是一个绝对路径,则在它之前的所有组件均会被舍弃
# 如果最后一个组件为空,则生成的路径以一个’/’分隔符结尾
data = cv2.imread(os.path.join(self.file_path, self.img_names[index]))
data = cv2.resize(data, (40, 40)) # 统一图像尺寸
label = int(self.img_names[index][0]) - 1
# label = np.array([label])
data = data.transpose((2, 0, 1)).astype(np.float32) / 255. # 修改为(3, 40, 40) # 对data矩阵进行转置,将data的格式转化为float32
data = data[np.newaxis, ...] # 添加一个batch_size维度
# data = Tensor(data)
# label = Tensor(label)
return data, label
def __len__(self):
return len(self.img_names)
4. 训练 LeNet5 模型
def train_lenet():
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
train_dataset_generator = TrainDatasetGenerator('E:/program/miniconda/envs/MindSpore/exam')
ds_train = ds.GeneratorDataset(train_dataset_generator, ['data', 'label'], shuffle=True)
ds_train = ds_train.shuffle(buffer_size=5)
ds_train = ds_train.batch(batch_size=4, drop_remainder=True)
network = LeNet5(num_class=34)
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_opt = nn.RMSProp(network.trainable_params(), learning_rate=0.001, decay=0.99, epsilon=1e-08, momentum=0.9)
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_opt = nn.RMSProp(network.trainable_params(), learning_rate=0.001, decay=0.99, epsilon=1e-08, momentum=0.9)
time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
config_ck = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(),
keep_checkpoint_max=10)
config_ckpt_path = 'E:/program/miniconda/envs/MindSpore/ckpt/'
ckpoint_cb = ModelCheckpoint(prefix='checkpoint_lenet', directory=config_ckpt_path, config=config_ck)
# 模型编译
model = Model(network, net_loss, net_opt, metrics={'Accuracy': Accuracy()})
epoch_size = 10
model.train(epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()], dataset_sink_mode=True)
5. 使用训练好的模型进行人脸识别
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') # 加载检测器
threshold = 0.95 # 设置阈值
cap = cv2.VideoCapture(0)
stop = False
while not stop:
success, img = cap.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
subjects = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19',
'20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33']
# 生成图像的副本,这样就能保留原始图像
img1 = img.copy()
rect = face_cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30),
flags=cv2.CASCADE_SCALE_IMAGE)
if len(rect) == 0:
cv2.putText(img1, 'unrecognized', (10, 20), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
if not rect is None:
for (x, y, w, h) in rect:
face = img[y:y + w, x:x + h].astype(np.float32) # 数值转换
face = cv2.resize(face, (40, 40))
face = face.transpose().astype(np.float32) / 255.
face = Tensor(face)
cv2.rectangle(img1, (x, y), (x + w, y + h), (0, 255, 0), 2) # 画出矩形框
output = network(face)
predicted_class = np.argmax(output.asnumpy(), axis=1)
if output.asnumpy()[0][predicted_class[0]] < threshold:
txt = 'unknown'
else:
txt = subjects[predicted_class[0]]
cv2.putText(img1, txt, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
cv2.imshow('img', img1)
k = cv2.waitKey(10)
if k & 0xFF == ord('q'): # 按下q程序结束
stop = True
cv2.destroyAllWindows() # 释放窗口
6. 运行代码
if __name__ == '__main__':
train_lenet()
错误解决
这个错误提示是因为在LeNet5网络中,第一个卷积层的输入是一个4维的张量,但是在TrainDatasetGenerator中的__getitem__方法中,返回的data是一个5维的张量。这是因为在对data进行转置时,多了一维,需要将其去掉。修改代码如下:
class TrainDatasetGenerator:
def __init__(self, file_path):
self.file_path = file_path
self.img_names = os.listdir(file_path)
def __getitem__(self, index):
data = cv2.imread(os.path.join(self.file_path, self.img_names[index]))
data = cv2.resize(data, (40, 40))
label = int(self.img_names[index][0]) - 1
data = data.transpose((2, 0, 1)).astype(np.float32) / 255.
data = data[np.newaxis, ...] # 去掉多余的维度
return data, label
def __len__(self):
return len(self.img_names)
总结
本教程介绍了使用 MindSpore 框架训练 LeNet5 模型并进行人脸识别的步骤,并提供了相应的代码示例。您可以根据自己的需求修改代码,例如调整模型参数、数据集、训练参数等。
原文地址: https://www.cveoy.top/t/topic/jrDz 著作权归作者所有。请勿转载和采集!