MindSpore LeNet5 人脸识别模型训练及实时识别
import numpy as np
import mindspore.dataset as ds
import os
import cv2
from mindspore import Tensor
import mindspore.nn as nn
from mindspore.common.initializer import Normal
from mindspore import context
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.train import Model
from mindspore.nn.metrics import Accuracy
np.random.seed(58)
class LeNet5(nn.Cell):
'Lenet network
Args:
num_class (int): Number of classes. Default: 10.
num_channel (int): Number of channels. Default: 1.
Returns:
Tensor, output tensor
Examples:
>>> LeNet(num_class=10)
'
def __init__(self, num_class=34, num_channel=3, include_top=True):
super(LeNet5, self).__init__()
self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid')
self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid')
self.relu = nn.ReLU()
self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
# 是否包含dense层,也就是全连接层
self.include_top = include_top
if self.include_top:
self.flatten = nn.Flatten()
self.fc1 = nn.Dense(784, 120, weight_init=Normal(0.02)) # 16^2=256 28^2=784
self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
def construct(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.max_pool2d(x)
x = self.conv2(x)
x = self.relu(x)
x = self.max_pool2d(x)
if not self.include_top:
return x
x = self.flatten(x)
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
class TrainDatasetGenerator:
def __init__(self, file_path):
self.file_path = file_path
self.img_names = os.listdir(file_path) # os.listdir()方法用于返回指定的文件夹包含的文件或文件夹的名字的列表,但有个很明显的缺点,它的默认顺序不是有序的或者说不是通常的顺序(不知道用啥排的)。
def __getitem__(self, index):
# os.path.join()函数:连接两个或更多的路径名组件
# 如果各组件名首字母不包含’/’,则函数会自动加上
# 如果有一个组件是一个绝对路径,则在它之前的所有组件均会被舍弃
# 如果最后一个组件为空,则生成的路径以一个’/’分隔符结尾
data = cv2.imread(os.path.join(self.file_path, self.img_names[index]))
data = cv2.resize(data, (40, 40)) # 统一图像尺寸
label = int(self.img_names[index][0]) - 1
# label = np.array([label])
data = data.transpose((2, 0, 1)).astype(np.float32) / 255. # 修改为(3, 40, 40)
# data = np.expand_dims(data, axis=0) # 添加一个batch_size维度
# data = Tensor(data)
# label = Tensor(label)
return data, label
def __len__(self):
return len(self.img_names)
def train_lenet():
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
train_dataset_generator = TrainDatasetGenerator('E:/program/miniconda/envs/MindSpore/exam')
ds_train = ds.GeneratorDataset(train_dataset_generator, ['data', 'label'], shuffle=True)
ds_train = ds_train.shuffle(buffer_size=5)
ds_train = ds_train.batch(batch_size=4, drop_remainder=True)
network = LeNet5(num_class=34)
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_opt = nn.RMSProp(network.trainable_params(), learning_rate=0.001, decay=0.99, epsilon=1e-08, momentum=0.9)
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_opt = nn.RMSProp(network.trainable_params(), learning_rate=0.001, decay=0.99, epsilon=1e-08, momentum=0.9)
time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
config_ck = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(),
keep_checkpoint_max=10)
config_ckpt_path = 'E:/program/miniconda/envs/MindSpore/ckpt/'
ckpoint_cb = ModelCheckpoint(prefix='checkpoint_lenet', directory=config_ckpt_path, config=config_ck)
# 模型编译
model = Model(network, net_loss, net_opt, metrics={'Accuracy': Accuracy()})
epoch_size = 10
model.train(epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()], dataset_sink_mode=True)
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') # 加载检测器
threshold = 0.95 # 设置阈值
cap = cv2.VideoCapture(0)
stop = False
while not stop:
success, img = cap.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
subjects = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19',
'20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33']
# 生成图像的副本,这样就能保留原始图像
img1 = img.copy()
rect = face_cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30),
flags=cv2.CASCADE_SCALE_IMAGE)
if len(rect) == 0:
cv2.putText(img1, 'unrecognized', (10, 20), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
if not rect is None:
for (x, y, w, h) in rect:
face = img[y:y + w, x:x + h].astype(np.float32) # 数值转换
face = cv2.resize(face, (40, 40))
face = face.transpose().astype(np.float32) / 255.
face = np.expand_dims(face, axis=0)
face = Tensor(face)
cv2.rectangle(img1, (x, y), (x + w, y + h), (0, 255, 0), 2) # 画出矩形框
output = network(face)
predicted_class = np.argmax(output.asnumpy(), axis=1)
if output.asnumpy()[0][predicted_class[0]] < threshold:
txt = 'unknown'
else:
txt = subjects[predicted_class[0]]
cv2.putText(img1, txt, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
cv2.imshow('img', img1)
k = cv2.waitKey(10)
if k & 0xFF == ord('q'): # 按下q程序结束
stop = True
cv2.destroyAllWindows() # 释放窗口
if __name__ == '__main__':
train_lenet()
提高识别准确率内容:1. 数据增强:可以通过数据增强的方式,增加数据的多样性,提高模型的泛化能力。比如随机旋转、平移、裁剪、缩放、翻转等。
2. 调整模型结构:可以尝试调整模型结构,增加或减少网络层数,增加或减少神经元数量等。
3. 调整超参数:可以通过调整学习率、正则化系数、批量大小等超参数,优化模型的训练过程。
4. 使用预训练模型:可以使用已经训练好的模型,在此基础上进行微调,以提高模型的准确率。
5. 融合多个模型:可以将多个模型的预测结果进行融合,以提高识别准确率。比如可以使用投票法、加权平均法等。
原文地址: https://www.cveoy.top/t/topic/jrEU 著作权归作者所有。请勿转载和采集!