这段代码实现了人脸姿态估计的功能,主要包括以下步骤:

  1. 使用 dlib 库中的函数,通过预训练的人脸检测模型找到人脸的位置信息。
  2. 使用 dlib 库中的函数,通过预训练的人脸关键点检测模型找到人脸的 68 个关键点的位置信息。
  3. 使用以上得到的位置信息和事先定义好的 3D 人脸模型,通过 cv2 库中的函数计算人脸的旋转和平移向量。
  4. 使用以上得到的旋转和平移向量,通过 cv2 库中的函数计算人脸的欧拉角。
  5. 将以上得到的位置信息、旋转向量、平移向量和欧拉角可视化到原始图像上。
  6. 针对不同输入图片,使用以上步骤进行迭代处理,得到最终的输出图片和欧拉角信息。

代码中还使用了计时器、滑动窗口平均等技术对算法进行优化。

代码解析:

import os
import os.path as osp

from timer import Timer
from utils import Annotator


t = Timer()

class HeadposeDetection():

    # 3D facial model coordinates
    landmarks_3d_list = [
        np.array([
            [ 0.000,  0.000,   0.000],    # Nose tip
            [ 0.000, -8.250,  -1.625],    # Chin
            [-5.625,  4.250,  -3.375],    # Left eye left corner
            [ 5.625,  4.250,  -3.375],    # Right eye right corner
            [-3.750, -3.750,  -3.125],    # Left Mouth corner
            [ 3.750, -3.750,  -3.125]     # Right mouth corner 
        ], dtype=np.double),
        np.array([
            [ 0.000000,  0.000000,  6.763430],   # 52 nose bottom edge
            [ 6.825897,  6.760612,  4.402142],   # 33 left brow left corner
            [ 1.330353,  7.122144,  6.903745],   # 29 left brow right corner
            [-1.330353,  7.122144,  6.903745],   # 34 right brow left corner
            [-6.825897,  6.760612,  4.402142],   # 38 right brow right corner
            [ 5.311432,  5.485328,  3.987654],   # 13 left eye left corner
            [ 1.789930,  5.393625,  4.413414],   # 17 left eye right corner
            [-1.789930,  5.393625,  4.413414],   # 25 right eye left corner
            [-5.311432,  5.485328,  3.987654],   # 21 right eye right corner
            [ 2.005628,  1.409845,  6.165652],   # 55 nose left corner
            [-2.005628,  1.409845,  6.165652],   # 49 nose right corner
            [ 2.774015, -2.080775,  5.048531],   # 43 mouth left corner
            [-2.774015, -2.080775,  5.048531],   # 39 mouth right corner
            [ 0.000000, -3.116408,  6.097667],   # 45 mouth central bottom corner
            [ 0.000000, -7.415691,  4.070434]    # 6 chin corner
        ], dtype=np.double),
        np.array([
            [ 0.000000,  0.000000,  6.763430],   # 52 nose bottom edge
            [ 5.311432,  5.485328,  3.987654],   # 13 left eye left corner
            [ 1.789930,  5.393625,  4.413414],   # 17 left eye right corner
            [-1.789930,  5.393625,  4.413414],   # 25 right eye left corner
            [-5.311432,  5.485328,  3.987654]    # 21 right eye right corner
        ], dtype=np.double)
    ]

    # 2d facial landmark list
    lm_2d_index_list = [
        [30, 8, 36, 45, 48, 54],
        [33, 17, 21, 22, 26, 36, 39, 42, 45, 31, 35, 48, 54, 57, 8], # 14 points
        [33, 36, 39, 42, 45] # 5 points
    ]

    def __init__(self, lm_type=1, predictor='shape_predictor_68_face_landmarks.dat', verbose=True):  #原:predictor='model/shape_predictor_68_face_landmarks.dat'
        self.bbox_detector = dlib.get_frontal_face_detector()      #返回人脸位置:为一个矩形框的左上与右下点的坐标
        self.landmark_predictor = dlib.shape_predictor(predictor)  #返回训练好的68点人脸检测模型

        self.lm_2d_index = self.lm_2d_index_list[lm_type]
        self.landmarks_3d = self.landmarks_3d_list[lm_type]

        self.v = verbose


    def to_numpy(self, landmarks):
        coords = []
        for i in self.lm_2d_index:
            coords += [[landmarks.part(i).x, landmarks.part(i).y]]
        return np.array(coords).astype(np.int)

    def get_landmarks(self, im):
        # Detect bounding boxes of faces
        t.tic('bb')
        rects = self.bbox_detector(im, 0) if im is not None else []
            
        if self.v: 
            print(', bb: %.2f' % t.toc('bb'), end='ms')

        if len(rects) > 0:
            # Detect landmark of first face
            t.tic('lm')
            landmarks_2d = self.landmark_predictor(im, rects[0])

            # Choose specific landmarks corresponding to 3D facial model
            landmarks_2d = self.to_numpy(landmarks_2d)
            if self.v: 
                print(', lm: %.2f' % t.toc('lm'), end='ms')
                
            rect = [rects[0].left(), rects[0].top(), rects[0].right(), rects[0].bottom()]

            return landmarks_2d.astype(np.double), rect

        else:
            return None, None


    def get_headpose(self, im, landmarks_2d, verbose=False):
        h, w, c = im.shape
        f = w # column size = x axis length (focal length)
        u0, v0 = w / 2, h / 2 # center of image plane
        camera_matrix = np.array(
            [[f, 0, u0],
             [0, f, v0],
             [0, 0, 1]], dtype = np.double
         )
         
        # Assuming no lens distortion
        dist_coeffs = np.zeros((4,1)) 

        # Find rotation, translation
        (success, rotation_vector, translation_vector) = cv2.solvePnP(self.landmarks_3d, landmarks_2d, camera_matrix, dist_coeffs)
        
        if verbose:
            print('Camera Matrix:
 {0}'.format(camera_matrix))
            print('Distortion Coefficients:
 {0}'.format(dist_coeffs))
            print('Rotation Vector:
 {0}'.format(rotation_vector))
            print('Translation Vector:
 {0}'.format(translation_vector))

        return rotation_vector, translation_vector, camera_matrix, dist_coeffs


    # rotation vector to euler angles
    def get_angles(self, rvec, tvec):
        rmat = cv2.Rodrigues(rvec)[0]
        P = np.hstack((rmat, tvec)) # projection matrix [R | t]
        degrees = -cv2.decomposeProjectionMatrix(P)[6]
        rx, ry, rz = degrees[:, 0]
        return [rx, ry, rz]

    # moving average history
    history = {'lm': [], 'bbox': [], 'rvec': [], 'tvec': [], 'cm': [], 'dc': []}
    
    def add_history(self, values):
        for (key, value) in zip(self.history, values):
            self.history[key] += [value]
            
    def pop_history(self):
        for key in self.history:
            self.history[key].pop(0)
            
    def get_history_len(self):
        return len(self.history['lm'])
            
    def get_ma(self):
        res = []
        for key in self.history:
            res += [np.mean(self.history[key], axis=0)]
        return res
    
    # return image and angles
    def process_image(self, im, draw=True, ma=3):
        # landmark Detection
        im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
        landmarks_2d, bbox = self.get_landmarks(im_gray)

        # if no face deteced, return original image
        if landmarks_2d is None:
            return im, None

        # Headpose Detection
        t.tic('hp')
        rvec, tvec, cm, dc = self.get_headpose(im, landmarks_2d)
        if self.v: 
            print(', hp: %.2f' % t.toc('hp'), end='ms')
            
        if ma > 1:
            self.add_history([landmarks_2d, bbox, rvec, tvec, cm, dc])
            if self.get_history_len() > ma:
                self.pop_history()
            landmarks_2d, bbox, rvec, tvec, cm, dc = self.get_ma()

        t.tic('ga')
        angles = self.get_angles(rvec, tvec)
        if self.v: 
            print(', ga: %.2f' % t.toc('ga'), end='ms')

        if draw:
            t.tic('draw')
            annotator = Annotator(im, angles, bbox, landmarks_2d, rvec, tvec, cm, dc, b=10.0)
            im = annotator.draw_all()
            if self.v: 
                print(', draw: %.2f' % t.toc('draw'), end='ms' + ' ' * 10)
         
        return im, angles


def main(args):
    in_dir = args['input_dir']
    out_dir = args['output_dir']

    # Initialize head pose detection
    hpd = HeadposeDetection(args['landmark_type'], args['landmark_predictor'])

    for filename in os.listdir(in_dir):
        name, ext = osp.splitext(filename)
        if ext in ['.jpg', '.png', '.gif']: 
            print('> image:', filename, end='')
            image = cv2.imread(in_dir + filename)
            res, angles = hpd.process_image(image)
            cv2.imwrite(out_dir + name + '_out.png', res)
        else:
            print('> skip:', filename, end='')
        print('')


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-i', metavar='DIR', dest='input_dir', default=r'9.png')
    parser.add_argument('-o', metavar='DIR', dest='output_dir', default='res/')
    parser.add_argument('-lt', metavar='N', dest='landmark_type', type=int, default=1, help='Landmark type.')
    parser.add_argument('-lp', metavar='FILE', dest='landmark_predictor', 
                        default='shape_predictor_68_face_landmarks.dat', help='Landmark predictor data file.')  #原:default='model/shape_predictor_68_face_landmarks.dat'
    args = vars(parser.parse_args())

    if not osp.exists(args['output_dir']): os.mkdir(args['output_dir'])
    if args['output_dir'][-1] != '/': args['output_dir'] += '/'
    if args['input_dir'][-1] != '/': args['input_dir'] += '/'
    main(args)

代码优化:

  • **计时器:**代码使用 Timer 类记录关键步骤的执行时间,方便分析算法性能。
  • **滑动窗口平均:**代码通过 history 字典记录多个帧的计算结果,并使用滑动窗口平均计算最终结果,平滑噪声,提升稳定性。

总结:

这段代码通过 dlib 和 OpenCV 库实现了人脸姿态估计功能,代码步骤清晰,并结合计时器和滑动窗口平均等优化技术,提升算法效率。

人脸姿态估计代码解析:原理、步骤与优化

原文地址: https://www.cveoy.top/t/topic/nDTT 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录