人脸姿态估计代码解析:原理、步骤与优化
这段代码实现了人脸姿态估计的功能,主要包括以下步骤:
- 使用 dlib 库中的函数,通过预训练的人脸检测模型找到人脸的位置信息。
- 使用 dlib 库中的函数,通过预训练的人脸关键点检测模型找到人脸的 68 个关键点的位置信息。
- 使用以上得到的位置信息和事先定义好的 3D 人脸模型,通过 cv2 库中的函数计算人脸的旋转和平移向量。
- 使用以上得到的旋转和平移向量,通过 cv2 库中的函数计算人脸的欧拉角。
- 将以上得到的位置信息、旋转向量、平移向量和欧拉角可视化到原始图像上。
- 针对不同输入图片,使用以上步骤进行迭代处理,得到最终的输出图片和欧拉角信息。
代码中还使用了计时器、滑动窗口平均等技术对算法进行优化。
代码解析:
import os
import os.path as osp
from timer import Timer
from utils import Annotator
t = Timer()
class HeadposeDetection():
# 3D facial model coordinates
landmarks_3d_list = [
np.array([
[ 0.000, 0.000, 0.000], # Nose tip
[ 0.000, -8.250, -1.625], # Chin
[-5.625, 4.250, -3.375], # Left eye left corner
[ 5.625, 4.250, -3.375], # Right eye right corner
[-3.750, -3.750, -3.125], # Left Mouth corner
[ 3.750, -3.750, -3.125] # Right mouth corner
], dtype=np.double),
np.array([
[ 0.000000, 0.000000, 6.763430], # 52 nose bottom edge
[ 6.825897, 6.760612, 4.402142], # 33 left brow left corner
[ 1.330353, 7.122144, 6.903745], # 29 left brow right corner
[-1.330353, 7.122144, 6.903745], # 34 right brow left corner
[-6.825897, 6.760612, 4.402142], # 38 right brow right corner
[ 5.311432, 5.485328, 3.987654], # 13 left eye left corner
[ 1.789930, 5.393625, 4.413414], # 17 left eye right corner
[-1.789930, 5.393625, 4.413414], # 25 right eye left corner
[-5.311432, 5.485328, 3.987654], # 21 right eye right corner
[ 2.005628, 1.409845, 6.165652], # 55 nose left corner
[-2.005628, 1.409845, 6.165652], # 49 nose right corner
[ 2.774015, -2.080775, 5.048531], # 43 mouth left corner
[-2.774015, -2.080775, 5.048531], # 39 mouth right corner
[ 0.000000, -3.116408, 6.097667], # 45 mouth central bottom corner
[ 0.000000, -7.415691, 4.070434] # 6 chin corner
], dtype=np.double),
np.array([
[ 0.000000, 0.000000, 6.763430], # 52 nose bottom edge
[ 5.311432, 5.485328, 3.987654], # 13 left eye left corner
[ 1.789930, 5.393625, 4.413414], # 17 left eye right corner
[-1.789930, 5.393625, 4.413414], # 25 right eye left corner
[-5.311432, 5.485328, 3.987654] # 21 right eye right corner
], dtype=np.double)
]
# 2d facial landmark list
lm_2d_index_list = [
[30, 8, 36, 45, 48, 54],
[33, 17, 21, 22, 26, 36, 39, 42, 45, 31, 35, 48, 54, 57, 8], # 14 points
[33, 36, 39, 42, 45] # 5 points
]
def __init__(self, lm_type=1, predictor='shape_predictor_68_face_landmarks.dat', verbose=True): #原:predictor='model/shape_predictor_68_face_landmarks.dat'
self.bbox_detector = dlib.get_frontal_face_detector() #返回人脸位置:为一个矩形框的左上与右下点的坐标
self.landmark_predictor = dlib.shape_predictor(predictor) #返回训练好的68点人脸检测模型
self.lm_2d_index = self.lm_2d_index_list[lm_type]
self.landmarks_3d = self.landmarks_3d_list[lm_type]
self.v = verbose
def to_numpy(self, landmarks):
coords = []
for i in self.lm_2d_index:
coords += [[landmarks.part(i).x, landmarks.part(i).y]]
return np.array(coords).astype(np.int)
def get_landmarks(self, im):
# Detect bounding boxes of faces
t.tic('bb')
rects = self.bbox_detector(im, 0) if im is not None else []
if self.v:
print(', bb: %.2f' % t.toc('bb'), end='ms')
if len(rects) > 0:
# Detect landmark of first face
t.tic('lm')
landmarks_2d = self.landmark_predictor(im, rects[0])
# Choose specific landmarks corresponding to 3D facial model
landmarks_2d = self.to_numpy(landmarks_2d)
if self.v:
print(', lm: %.2f' % t.toc('lm'), end='ms')
rect = [rects[0].left(), rects[0].top(), rects[0].right(), rects[0].bottom()]
return landmarks_2d.astype(np.double), rect
else:
return None, None
def get_headpose(self, im, landmarks_2d, verbose=False):
h, w, c = im.shape
f = w # column size = x axis length (focal length)
u0, v0 = w / 2, h / 2 # center of image plane
camera_matrix = np.array(
[[f, 0, u0],
[0, f, v0],
[0, 0, 1]], dtype = np.double
)
# Assuming no lens distortion
dist_coeffs = np.zeros((4,1))
# Find rotation, translation
(success, rotation_vector, translation_vector) = cv2.solvePnP(self.landmarks_3d, landmarks_2d, camera_matrix, dist_coeffs)
if verbose:
print('Camera Matrix:
{0}'.format(camera_matrix))
print('Distortion Coefficients:
{0}'.format(dist_coeffs))
print('Rotation Vector:
{0}'.format(rotation_vector))
print('Translation Vector:
{0}'.format(translation_vector))
return rotation_vector, translation_vector, camera_matrix, dist_coeffs
# rotation vector to euler angles
def get_angles(self, rvec, tvec):
rmat = cv2.Rodrigues(rvec)[0]
P = np.hstack((rmat, tvec)) # projection matrix [R | t]
degrees = -cv2.decomposeProjectionMatrix(P)[6]
rx, ry, rz = degrees[:, 0]
return [rx, ry, rz]
# moving average history
history = {'lm': [], 'bbox': [], 'rvec': [], 'tvec': [], 'cm': [], 'dc': []}
def add_history(self, values):
for (key, value) in zip(self.history, values):
self.history[key] += [value]
def pop_history(self):
for key in self.history:
self.history[key].pop(0)
def get_history_len(self):
return len(self.history['lm'])
def get_ma(self):
res = []
for key in self.history:
res += [np.mean(self.history[key], axis=0)]
return res
# return image and angles
def process_image(self, im, draw=True, ma=3):
# landmark Detection
im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
landmarks_2d, bbox = self.get_landmarks(im_gray)
# if no face deteced, return original image
if landmarks_2d is None:
return im, None
# Headpose Detection
t.tic('hp')
rvec, tvec, cm, dc = self.get_headpose(im, landmarks_2d)
if self.v:
print(', hp: %.2f' % t.toc('hp'), end='ms')
if ma > 1:
self.add_history([landmarks_2d, bbox, rvec, tvec, cm, dc])
if self.get_history_len() > ma:
self.pop_history()
landmarks_2d, bbox, rvec, tvec, cm, dc = self.get_ma()
t.tic('ga')
angles = self.get_angles(rvec, tvec)
if self.v:
print(', ga: %.2f' % t.toc('ga'), end='ms')
if draw:
t.tic('draw')
annotator = Annotator(im, angles, bbox, landmarks_2d, rvec, tvec, cm, dc, b=10.0)
im = annotator.draw_all()
if self.v:
print(', draw: %.2f' % t.toc('draw'), end='ms' + ' ' * 10)
return im, angles
def main(args):
in_dir = args['input_dir']
out_dir = args['output_dir']
# Initialize head pose detection
hpd = HeadposeDetection(args['landmark_type'], args['landmark_predictor'])
for filename in os.listdir(in_dir):
name, ext = osp.splitext(filename)
if ext in ['.jpg', '.png', '.gif']:
print('> image:', filename, end='')
image = cv2.imread(in_dir + filename)
res, angles = hpd.process_image(image)
cv2.imwrite(out_dir + name + '_out.png', res)
else:
print('> skip:', filename, end='')
print('')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', metavar='DIR', dest='input_dir', default=r'9.png')
parser.add_argument('-o', metavar='DIR', dest='output_dir', default='res/')
parser.add_argument('-lt', metavar='N', dest='landmark_type', type=int, default=1, help='Landmark type.')
parser.add_argument('-lp', metavar='FILE', dest='landmark_predictor',
default='shape_predictor_68_face_landmarks.dat', help='Landmark predictor data file.') #原:default='model/shape_predictor_68_face_landmarks.dat'
args = vars(parser.parse_args())
if not osp.exists(args['output_dir']): os.mkdir(args['output_dir'])
if args['output_dir'][-1] != '/': args['output_dir'] += '/'
if args['input_dir'][-1] != '/': args['input_dir'] += '/'
main(args)
代码优化:
- **计时器:**代码使用
Timer类记录关键步骤的执行时间,方便分析算法性能。 - **滑动窗口平均:**代码通过
history字典记录多个帧的计算结果,并使用滑动窗口平均计算最终结果,平滑噪声,提升稳定性。
总结:
这段代码通过 dlib 和 OpenCV 库实现了人脸姿态估计功能,代码步骤清晰,并结合计时器和滑动窗口平均等优化技术,提升算法效率。
原文地址: https://www.cveoy.top/t/topic/nDTT 著作权归作者所有。请勿转载和采集!