基于ResNet的人脸识别系统:优化识别速度和准确性
def train_resnet():
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
train_dataset_generator = TrainDatasetGenerator('D:/pythonproject2/digital_mindspore/dataset')
ds_train = ds.GeneratorDataset(train_dataset_generator, ['data', 'label'], shuffle=True)
ds_train = ds_train.shuffle(buffer_size=10)
ds_train = ds_train.batch(batch_size=4, drop_remainder=True)
#train_data = []
#train_labels =[]
#for f in ds_train.create_dict_iterator(output_numpy=True):
# train_data.append(f['data'])
# train_labels.append(f['label'])
network = load_model_from_ckpt()
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.001, momentum=0.9)
#time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
#config_ck = CheckpointConfig(save_checkpoint_steps=10,keep_checkpoint_max=10)
#config_ckpt_path = 'D:/pythonproject2/ckpt/'
#ckpoint_cb = ModelCheckpoint(prefix='checkpoint_resnet', directory=config_ckpt_path, config=config_ck)
model = Model(network, net_loss, net_opt, metrics={'Accuracy': Accuracy()})
#epoch_size = 20
#print('============== Starting Training =============')
#model.train(epoch_size, ds_train, callbacks=[time_cb, ckpoint_cb, LossMonitor()])
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') # 加载检测器
# 训练阶段
cap = cv2.VideoCapture(0)
stop = False
while not stop:
success, img = cap.read()
subjects = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17',
'18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33']
# 生成图像的副本,这样就能保留原始图像
img1 = img.copy()
# 检测人脸
# 将测试图像转换为灰度图像,因为opencv人脸检测器需要灰度图像
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 检测多尺度图像,返回值是一张脸部区域信息的列表(x,y,宽,高)
rect = face_cascade.detectMultiScale(img, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30),
flags=cv2.CASCADE_SCALE_IMAGE)
# 如果未检测到面部
if len(rect) == 0:
txt = 'no face!'
cv2.putText(img1, txt, (10, 20), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
if not rect is None:
for (x, y, w, h) in rect:
face = gray[y:y + w, x:x + h].astype(np.float32) # 数值转换
face = cv2.resize(face, (100, 100))
cv2.rectangle(img1, (x, y), (x + w, y + h), (0, 255, 0), 2) # 画出矩形框
min_d = 1000000000000
c = -1
for f in ds_train.create_dict_iterator(output_numpy=True): # f是训练数据的信息,遍历训练数据
d = ((face - f['data']) ** 2).sum() # 计算test和训练图片的欧氏距离
if d < min_d:
min_d = d
label1 = f['label']
label_counter = Counter(label1)
most_common_label = label_counter.most_common(1)[0][0]
c = most_common_label
if min_d < 200000000000:
cv2.putText(img1, subjects[c], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
else:
label = 'unknown'
cv2.putText(img1, label, (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (128, 128, 0), 2)
cv2.imshow('img', img1)
if (cv2.waitKey(1) & 0xFF == ord('q')): # 按下q程序结束
stop = True
cv2.destroyAllWindows() # 释放窗口
if __name__ == '__main__':
train_resnet()
## 标签变化和优化方案
1. **标签一直在变换的原因可能是因为在每次检测到人脸时,都会重新遍历整个训练数据集计算欧式距离,所以每次识别的结果可能会不同。**
**优化方案:** 可以考虑在程序开始前将训练数据集读入内存,然后在检测到人脸时直接使用内存中的数据进行计算,避免重复遍历。
2. **可以使用特征提取的方式来避免计算测试图像与所有训练图像的欧式距离。**
**具体做法:** 使用一个预训练好的模型(如ResNet)来提取训练图像和测试图像的特征,然后计算这些特征之间的距离即可。这样做的好处是可以大大减少计算量,提高识别速度。
原文地址: https://www.cveoy.top/t/topic/jqvV 著作权归作者所有。请勿转载和采集!