ResNet 人脸识别模型训练与推理 - MindSpore 实践 - 常规

from collections import defaultdict, Counter
from mindspore.train.serialization import load_checkpoint, load_param_into_net
import numpy as np
import mindspore.dataset as ds
import cv2
import mindspore.nn as nn
import os
from mindspore import context, ops, Tensor
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
from mindspore.train import Model
from mindspore.nn.metrics import Accuracy
np.random.seed(58)


class ResidualBlock(nn.Cell):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride,   pad_mode='same')
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1,   pad_mode='same')
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.stride = stride

    def construct(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity
        out = self.relu(out)

        return out

class ResNet(nn.Cell):
    def __init__(self, block, layers, num_classes=34):
        super(ResNet, self).__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2,   pad_mode='valid')
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2,   pad_mode='valid')

        self.layer1 = self.make_layer(block, 64, layers[0])
        self.layer2 = self.make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self.make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self.make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AvgPool2d(kernel_size=3, stride=1,   pad_mode='valid')
        self.fc = nn.Dense(512 * block.expansion, num_classes)

    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels * block.expansion):
            downsample = nn.SequentialCell([
                nn.Conv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels * block.expansion)
            ])
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))
        return nn.SequentialCell(layers)

    def construct(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = ops.Reshape()(x, (ops.Shape()(x)[0], -1))
        x = self.fc(x)

        return x


class TrainDatasetGenerator:
    def __init__(self, file_path):
        self.file_path = file_path
        self.img_names = os.listdir(file_path)

    def __getitem__(self, index=0):
        data = cv2.imread(os.path.join(self.file_path, self.img_names[index]))
        label = int(self.img_names[index].split('-')[0])
        data = cv2.resize(data,(100,100))
        data = data.transpose().astype(np.float32) / 255.
        return data, label

    def __len__(self):
        return len(self.img_names)

def load_model_from_ckpt():
    context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
    # 创建ResNet模型
    network = ResNet(ResidualBlock,[2,2,2,2])
    # 加载ckpt文件中的模型参数
    param_dict = load_checkpoint('D:/pythonproject2/ckpt/checkpoint_resnet_1-20_49.ckpt')
     #将模型参数加载到模型中
    load_param_into_net(network, param_dict)
    # 返回模型
    return network

def train_resnet():
    context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
    train_dataset_generator = TrainDatasetGenerator('D:/pythonproject2/digital_mindspore/dataset')
    ds_train = ds.GeneratorDataset(train_dataset_generator, ['data', 'label'], shuffle=True)
    ds_train = ds_train.shuffle(buffer_size=10)
    ds_train = ds_train.batch(batch_size=4, drop_remainder=True)
    network = load_model_from_ckpt()
    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.001, momentum=0.9)
    #time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
    #config_ck = CheckpointConfig(save_checkpoint_steps=10,keep_checkpoint_max=10)
    #config_ckpt_path = 'D:/pythonproject2/ckpt/'
    #ckpoint_cb = ModelCheckpoint(prefix='checkpoint_resnet', directory=config_ckpt_path, config=config_ck)

    model = Model(network, net_loss, net_opt, metrics={'Accuracy': Accuracy()})
    #epoch_size = 20
    #print('============== Starting Training =============')
    #model.train(epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()])
    face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml')  # 加载检测器
    # 训练阶段

    cap = cv2.VideoCapture(0)
    stop = False
    while not stop:
        success, img = cap.read()
        subjects = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17',
                    '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33']
        # 生成图像的副本，这样就能保留原始图像
        img1 = img.copy()
        # 检测人脸
        # 将测试图像转换为灰度图像，因为opencv人脸检测器需要灰度图像
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # 检测多尺度图像，返回值是一张脸部区域信息的列表（x,y,宽,高）
        rect = face_cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30),
                                             flags=cv2.CASCADE_SCALE_IMAGE)
        # 如果未检测到面部
        if len(rect) == 0:
            txt = 'no face!'
            cv2.putText(img1, txt, (10, 20), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
        if not rect is None:
            for (x, y, w, h) in rect:
                face = gray[y:y + w, x:x + h].astype(np.float32)  # 数值转换
                face = cv2.resize(face, (100, 100))
                face = face.transpose().astype(np.float32) / 255.
                face = np.expand_dims(face, axis=0)  # 扩展维度，变成(batch_size, channels, height, width)
                face = Tensor(face)
                cv2.rectangle(img1, (x, y), (x + w, y + h), (0, 255, 0), 2)  # 画出矩形框
                output =network(face)
                predicted_class = np.argmax(output.asnumpy(),axis=1)
                label = subjects[predicted_class[0]]
                #if min_d < 200000000000:
                cv2.putText(img1, label, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
                #else:
                 #   label = 'unknown'
                  #  cv2.putText(img1, label, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
        cv2.imshow('img', img1)
        if (cv2.waitKey(1) & 0xFF == ord('q')):  # 按下q程序结束
                stop = True
                cv2.destroyAllWindows()  # 释放窗口

if __name__ == '__main__':
    train_resnet()

报错分析：

该报错提示 Conv2D 的输入 shape 需要是 4 维，但是输入的数据只有 3 维。可能是在调用 Conv2D 时输入的数据维度不正确。

解决方法：

检查输入数据的维度是否正确，需要是 (batch_size, channel, height, width) 的形式。在代码中，你需要确保 face 的维度为 (1, 1, 100, 100) 才能正确地传入 Conv2D。

face = gray[y:y + w, x:x + h].astype(np.float32)  # 数值转换
face = cv2.resize(face, (100, 100))
face = face.transpose().astype(np.float32) / 255.
face = np.expand_dims(face, axis=0)  # 扩展维度，变成(batch_size, channels, height, width)
face = np.expand_dims(face, axis=0)  # 再次扩展维度，变成(1, 1, 100, 100)
face = Tensor(face)

其他建议：

在代码中添加必要的注释，方便理解代码逻辑。
使用 print 语句打印关键变量的维度，方便调试。
考虑使用 Dataset 类来加载和处理数据集，可以提高代码的可读性和可维护性。
使用 Model 类来进行模型训练和推理，可以简化代码并提高代码的可复用性。
使用 ModelCheckpoint 类来保存模型训练过程中的参数，方便后续进行模型恢复或推理。

代码优化：

# ... (代码其他部分)

# 训练阶段
for epoch in range(20):  # 训练 20 个 epoch
    for data, label in ds_train:
        # ... (进行模型训练)

# 推理阶段
cap = cv2.VideoCapture(0)
while True:
    success, img = cap.read()
    # ... (进行人脸检测和识别)
    cv2.imshow('img', img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()

总结：

通过以上步骤，你可以成功地使用 MindSpore 框架训练 ResNet 模型进行人脸识别，并使用训练好的模型进行实时人脸识别推理。

注意：

该代码仅供参考，需要根据实际情况进行修改和完善。
训练模型需要准备充足的训练数据，才能获得较好的识别效果。
为了提高识别精度，需要进行模型调优，例如调整模型参数、优化网络结构等。
使用该代码进行人脸识别，需要遵守相关法律法规和道德准则。