使用MindSpore进行人脸识别
使用MindSpore进行人脸识别
本代码使用MindSpore训练ResNet模型,并将其用于人脸识别。代码包含模型定义、数据集加载、训练和测试等部分,并展示了如何使用加载的模型进行实时人脸识别。
from collections import defaultdict, Counter
from mindspore.train.serialization import load_checkpoint, load_param_into_net
import numpy as np
import mindspore.dataset as ds
import cv2
import mindspore.nn as nn
import os
from mindspore import context, ops, Tensor
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.train import Model
from mindspore.nn.metrics import Accuracy
np.random.seed(58)
class ResidualBlock(nn.Cell):
expansion = 1
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, pad_mode='same')
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, pad_mode='same')
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
self.stride = stride
def construct(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Cell):
def __init__(self, block, layers, num_classes=34):
super(ResNet, self).__init__()
self.in_channels = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, pad_mode='valid')
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU()
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='valid')
self.layer1 = self.make_layer(block, 64, layers[0])
self.layer2 = self.make_layer(block, 128, layers[1], stride=2)
self.layer3 = self.make_layer(block, 256, layers[2], stride=2)
self.layer4 = self.make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(kernel_size=3, stride=1, pad_mode='valid')
self.fc = nn.Dense(512 * block.expansion, num_classes)
def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels * block.expansion):
downsample = nn.SequentialCell([
nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride),
nn.BatchNorm2d(out_channels * block.expansion)
])
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels * block.expansion
for _ in range(1, blocks):
layers.append(block(self.in_channels, out_channels))
return nn.SequentialCell(layers)
def construct(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = ops.Reshape()(x, (ops.Shape()(x)[0], -1))
x = self.fc(x)
return x
class TrainDatasetGenerator:
def __init__(self, file_path):
self.file_path = file_path
self.img_names = os.listdir(file_path)
def __getitem__(self, index=0):
data = cv2.imread(os.path.join(self.file_path, self.img_names[index]))
label = int(self.img_names[index].split('-')[0])
data = cv2.resize(data,(100,100))
data = data.transpose().astype(np.float32) / 255.
return data, label
def __len__(self):
return len(self.img_names)
def load_model_from_ckpt():
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
# 创建ResNet模型
network = ResNet(ResidualBlock,[2,2,2,2])
# 加载ckpt文件中的模型参数
param_dict = load_checkpoint('D:/pythonproject2/ckpt/checkpoint_resnet_1-20_49.ckpt')
#将模型参数加载到模型中
load_param_into_net(network, param_dict)
# 返回模型
return network
def train_resnet():
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
train_dataset_generator = TrainDatasetGenerator('D:/pythonproject2/digital_mindspore/dataset')
ds_train = ds.GeneratorDataset(train_dataset_generator, ['data', 'label'], shuffle=True)
ds_train = ds_train.shuffle(buffer_size=10)
ds_train = ds_train.batch(batch_size=4, drop_remainder=True)
network = load_model_from_ckpt()
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.001, momentum=0.9)
#time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
#config_ck = CheckpointConfig(save_checkpoint_steps=10,keep_checkpoint_max=10)
#config_ckpt_path = 'D:/pythonproject2/ckpt/'
#ckpoint_cb = ModelCheckpoint(prefix='checkpoint_resnet', directory=config_ckpt_path, config=config_ck)
model = Model(network, net_loss, net_opt, metrics={'Accuracy': Accuracy()})
#epoch_size = 20
#print('============== Starting Training =============')
#model.train(epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()])
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') # 加载检测器
# 训练阶段
cap = cv2.VideoCapture(0)
stop = False
while not stop:
success, img = cap.read()
subjects = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17',
'18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33']
# 生成图像的副本,这样就能保留原始图像
img1 = img.copy()
# 检测人脸
# 将测试图像转换为灰度图像,因为opencv人脸检测器需要灰度图像
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 检测多尺度图像,返回值是一张脸部区域信息的列表(x,y,宽,高)
rect = face_cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30),
flags=cv2.CASCADE_SCALE_IMAGE)
# 如果未检测到面部
if len(rect) == 0:
txt = 'no face!'
cv2.putText(img1, txt, (10, 20), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
if not rect is None:
for (x, y, w, h) in rect:
face = gray[y:y + w, x:x + h].astype(np.float32) # 数值转换
face = cv2.resize(face, (100, 100))
face = face.transpose().astype(np.float32) / 255.
face = np.expand_dims(face, axis=0) # 扩展维度,变成(batch_size, channels, height, width)
face = Tensor(face)
print(face.shape)
cv2.rectangle(img1, (x, y), (x + w, y + h), (0, 255, 0), 2) # 画出矩形框
output =network(face)
predicted_class = np.argmax(output.asnumpy(),axis=1)
label = subjects[predicted_class[0]]
#if min_d < 200000000000:
cv2.putText(img1, label, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
#else:
# label = 'unknown'
# cv2.putText(img1, label, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
cv2.imshow('img', img1)
if (cv2.waitKey(1) & 0xFF == ord('q')): # 按下q程序结束
stop = True
cv2.destroyAllWindows() # 释放窗口
if __name__ == '__main__':
train_resnet()
# 为什么face是3维的,使用了face = np.expand_dims(face, axis=0) 也没有变成4维
## face 仍然是三维数组的原因
根据代码,face 在经过 resize 和 transpose 后是一个形状为 (100, 100, 3) 的三维数组,而在使用 np.expand_dims 添加一个新的维度后,形状变为了 (1, 100, 100, 3),仍然是一个三维数组,只是多了一个维度。因此,需要使用 Tensor 将其转换为 MindSpore 支持的四维张量,即 (batch_size, channels, height, width)。可以在 Tensor 函数中指定 batch_size 为 1 来实现这一点,即将 face 转换为一个形状为 (1, 3, 100, 100) 的四维张量。
代码解释
- 模型定义: 定义 ResNet 模型,包括 ResidualBlock 和 ResNet 类,并加载预训练好的模型参数。
- 数据集加载: 使用 GeneratorDataset 加载训练数据集,并进行 shuffle 和 batch 操作。
- 训练: 使用 Model 类进行模型训练,设置损失函数、优化器和评估指标。
- 人脸识别: 使用加载的模型对摄像头实时捕获的图像进行人脸识别,并将识别结果显示在图像上。
注意
- 代码中的路径需要根据实际情况修改。
- 训练模型需要一定时间,建议使用 GPU 训练。
- 为了方便演示,代码没有进行过多的参数调整,可以根据实际情况进行优化。
- 本代码只是展示了使用 MindSpore 进行人脸识别的基本流程,实际应用中可能需要更复杂的算法和模型。
总结
本代码展示了如何使用 MindSpore 训练 ResNet 模型并进行人脸识别,并解释了在使用 Tensor 转换时需要注意的细节。希望本代码能帮助您更好地理解 MindSpore 的使用和深度学习应用。
原文地址: https://www.cveoy.top/t/topic/jqzy 著作权归作者所有。请勿转载和采集!