ResNet 人脸识别模型训练与实时预测
根据代码中的注释,'face' 在经过 np.expand_dims(face, axis=0) 后应该变成了 4 维,但是实际上没有变成 4 维。这可能是因为 'face' 的维度已经是 4 维了,即使没有扩展维度也是 4 维。可以在 print(face.shape) 打印 'face' 的维度,确认一下是否已经是 4 维。如果是 4 维的话,就不需要再使用 np.expand_dims 扩展维度了。
from collections import defaultdict, Counter
from mindspore.train.serialization import load_checkpoint, load_param_into_net
import numpy as np
import mindspore.dataset as ds
import cv2
import mindspore.nn as nn
import os
from mindspore import context, ops, Tensor
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.train import Model
from mindspore.nn.metrics import Accuracy
np.random.seed(58)
class ResidualBlock(nn.Cell):
expansion = 1
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, pad_mode='same')
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, pad_mode='same')
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
self.stride = stride
def construct(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Cell):
def __init__(self, block, layers, num_classes=34):
super(ResNet, self).__init__()
self.in_channels = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, pad_mode='valid')
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU()
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='valid')
self.layer1 = self.make_layer(block, 64, layers[0])
self.layer2 = self.make_layer(block, 128, layers[1], stride=2)
self.layer3 = self.make_layer(block, 256, layers[2], stride=2)
self.layer4 = self.make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(kernel_size=3, stride=1, pad_mode='valid')
self.fc = nn.Dense(512 * block.expansion, num_classes)
def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels * block.expansion):
downsample = nn.SequentialCell([
nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride),
nn.BatchNorm2d(out_channels * block.expansion)
])
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels * block.expansion
for _ in range(1, blocks):
layers.append(block(self.in_channels, out_channels))
return nn.SequentialCell(layers)
def construct(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = ops.Reshape()(x, (ops.Shape()(x)[0], -1))
x = self.fc(x)
return x
class TrainDatasetGenerator:
def __init__(self, file_path):
self.file_path = file_path
self.img_names = os.listdir(file_path)
def __getitem__(self, index=0):
data = cv2.imread(os.path.join(self.file_path, self.img_names[index]))
label = int(self.img_names[index].split("-")[0])
data = cv2.resize(data,(100,100))
data = data.transpose().astype(np.float32) / 255.
return data, label
def __len__(self):
return len(self.img_names)
def load_model_from_ckpt():
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
# 创建ResNet模型
network = ResNet(ResidualBlock,[2,2,2,2])
# 加载ckpt文件中的模型参数
param_dict = load_checkpoint('D:/pythonproject2/ckpt/checkpoint_resnet_1-20_49.ckpt')
#将模型参数加载到模型中
load_param_into_net(network, param_dict)
# 返回模型
return network
def train_resnet():
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
train_dataset_generator = TrainDatasetGenerator('D:/pythonproject2/digital_mindspore/dataset')
ds_train = ds.GeneratorDataset(train_dataset_generator, ["data", "label"], shuffle=True)
ds_train = ds_train.shuffle(buffer_size=10)
ds_train = ds_train.batch(batch_size=4, drop_remainder=True)
network = load_model_from_ckpt()
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.001, momentum=0.9)
#time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
#config_ck = CheckpointConfig(save_checkpoint_steps=10,keep_checkpoint_max=10)
#config_ckpt_path = 'D:/pythonproject2/ckpt/'
#ckpoint_cb = ModelCheckpoint(prefix="checkpoint_resnet", directory=config_ckpt_path, config=config_ck)
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
#epoch_size = 20
#print("============== Starting Training ==============")
#model.train(epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()])
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') # 加载检测器
# 训练阶段
cap = cv2.VideoCapture(0)
stop = False
while not stop:
success, img = cap.read()
subjects = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17',
'18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33']
# 生成图像的副本,这样就能保留原始图像
img1 = img.copy()
# 检测人脸
# 将测试图像转换为灰度图像,因为opencv人脸检测器需要灰度图像
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 检测多尺度图像,返回值是一张脸部区域信息的列表(x,y,宽,高)
rect = face_cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30),
flags=cv2.CASCADE_SCALE_IMAGE)
# 如果未检测到面部
if len(rect) == 0:
txt = 'no face!'
cv2.putText(img1, txt, (10, 20), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
if not rect is None:
for (x, y, w, h) in rect:
face = gray[y:y + w, x:x + h].astype(np.float32) # 数值转换
face = cv2.resize(face, (100, 100))
face = face.transpose().astype(np.float32) / 255.
face = np.expand_dims(face, axis=0) # 扩展维度,变成(batch_size, channels, height, width)
face = Tensor(face)
print(face.shape)
cv2.rectangle(img1, (x, y), (x + w, y + h), (0, 255, 0), 2) # 画出矩形框
output =network(face)
predicted_class = np.argmax(output.asnumpy(),axis=1)
label = subjects[predicted_class[0]]
#if min_d < 200000000000:
cv2.putText(img1, label, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
#else:
# label = 'unknown'
# cv2.putText(img1, label, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
cv2.imshow('img', img1)
if (cv2.waitKey(1) & 0xFF == ord('q')): # 按下q程序结束
stop = True
cv2.destroyAllWindows() # 释放窗口
if __name__ == "__main__":
train_resnet()
建议您在代码中添加 print(face.shape) 来查看 face 的维度,确认是否已经是 4 维。
补充说明:
- 由于 MindSpore 的
Tensor类本身可以处理多维数据,face在经过np.expand_dims后不一定需要再转换成Tensor。可以尝试直接将face传入network进行预测,观察结果是否一致。 np.expand_dims的作用是增加维度,但是如果原数据已经是多维的,扩展后的维度也可能是已经存在的维度。因此,建议先确认原数据的维度,再决定是否使用np.expand_dims。- 使用 MindSpore 进行人脸识别时,可以考虑使用
mindspore.dataset.vision.c_transforms中的图像预处理函数,例如mindspore.dataset.vision.c_transforms.Resize和mindspore.dataset.vision.c_transforms.Normalize,可以方便地对图像进行预处理,提高模型的识别精度。 - 可以参考 MindSpore 的官方文档和示例代码,学习更多关于使用 MindSpore 进行人脸识别的知识。
原文地址: https://www.cveoy.top/t/topic/jqzv 著作权归作者所有。请勿转载和采集!