目的:识别手部在脸上的动作,比如:涂眼霜、涂水乳、敷面膜、没动作

参考链接:

https://github.com/xinghaochen/awesome-hand-pose-estimation

https://github.com/lmb-freiburg/hand3d

https://github.com/FORTH-ModelBasedTracker/MonocularRGB_3D_Handpose_WACV18

openpose:

方法1:svm
数据集:20-30人,3w+张图片
准确度:0.85(opencv的手部特征检测器不准)

训练技巧:
1.分别选取稳定的几个点代表整个手部,脸部也是如此
2.坏帧清洗,自动剔除没有检测到手或者图片模糊导致检测不准的帧
3.数据做归一化处理

特征:
1.计算手部中心点和脸部中心点的欧氏距离
2.计算指尖中心点到内眼角的欧氏距离
3.计算指尖与眼睛的y轴距离
4.计算手部和脸部所有关键点向量的欧式距离
5.计算手部和脸部所有关键点向量的曼哈顿距离

数据处理:

1.把视频流按帧保存下来:

import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils


def getImageVar(image):
	img2gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	imageVar = cv2.Laplacian(img2gray, cv2.CV_64F).var()
	return imageVar



video_path = 'data2/vedio/width/1.mp4'
save_path='data2/5/'
cap = cv2.VideoCapture(video_path)

hasFrame, frame = cap.read()
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]

#开始进入循环取帧
k=0
while (hasFrame):
	k += 1
	hasFrame, frame = cap.read()   #读取视频帧
	frame=frame[0:480,150:530]
	h,w,_=frame.shape
	frame=cv2.resize(frame,(int(w*0.9),int(h*0.9)))
	
	# frame=cv2.resize(frame,(640,480))
	# frame=frame[0:480,150:530]
	# frame=cv2.resize(frame,(432,342))
	# print(h,w)
	if k%5==0:
		# print(k,hasFrame)
		fm=getImageVar(frame)
		if fm>=50:
			print(k,fm)
			cv2.imwrite(save_path+'frame_%d.jpg' %k, frame)   #保存视频帧

cap.release()

2.清洗掉没有人脸的帧:

import matplotlib.pyplot as plt
import cv2
import numpy as np
import  dlib
import os

# 如果未检测到人脸,那么返回false,否则返回true
face_detector = dlib.get_frontal_face_detector()
landmark_predictor = dlib.shape_predictor("class\shape_predictor_68_face_landmarks.dat")   #加载人脸特征点模型

def geteye_rect(imgpath):
    bgrImg = cv2.imread(imgpath)
    name=imgpath.split('/')[-1].split('.')[0]

    if bgrImg is None:
        return False
        
    # rgbImg = cv2.cvtColor(bgrImg, cv2.COLOR_BGR2RGB)
    facesrect = face_detector(bgrImg, 1)
    
    if len(facesrect) != 1:
        print(imgpath,"none.")
        os.remove(imgpath)
n=0
path='data2/5/'
for root,dirs,files in os.walk(path):
    for file in files:  # 遍历文件
        if file.endswith('jpg'):
            n += 1
            file_name= root+'/'+file
            print(n, file_name)
            geteye_rect(file_name)

提取特征:

3.坐标点检测(人脸/手部关键点)

import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils

detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('class/shape_predictor_68_face_landmarks.dat')

def face_detect(pic):
    img = np.copy(pic)
    # img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # 人脸数rects
    rects = detector(img, 1)
    if len(rects)!=1:
        print(len(rects),"face detection fail!")
        exit()

    landmarks = np.matrix([[p.x, p.y] for p in predictor(img,rects[0]).parts()])

    for idx, point in enumerate(landmarks):
        x=point[0, 0]
        y=point[0, 1]
        f1.write(str(x)+","+str(y)+"  ")

        # 画图和点
        cv2.circle(img, (x,y), 2, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
        cv2.putText(img, str(idx), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 1,cv2.LINE_AA)

    f1.write("\n")

    return img

def hand_detect(pic, label, img_face, file_name):
    protoFile = "class/hand/pose_deploy.prototxt"
    weightsFile = "class/hand/pose_iter_102000.caffemodel"
    nPoints = 22
    POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12],
                  [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
    net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
    
    img_hand=np.copy(img_face)   #用来画手的坐标点
    frame = np.copy(pic)   #用来做手部检测
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
    aspect_ratio = frameWidth / frameHeight

    threshold = 0.1

    t = time.time()
    # input image dimensions for the network
    inHeight = 368
    inWidth = int(((aspect_ratio * inHeight) * 8) // 8)
    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)

    net.setInput(inpBlob)

    output = net.forward()
    print("time taken by network : {:.3f}".format(time.time() - t))

    # Empty list to store the detected keypoints
    points = []

    for i in range(nPoints):
        # 对应身体部位的置信度图
        probMap = output[0, i, :, :]
        probMap = cv2.resize(probMap, (frameWidth, frameHeight))
        # cv2.minMaxLoc:返回矩阵的最小值,最大值,并得到最大值,最小值的索引
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)   #找最大值及其下标(即置信度最大的点)

        if prob >= threshold:
            # 获取坐标点
            x,y=(int(point[0]), int(point[1]))
            f2.write(str(x)+","+str(y)+"  ")

            # 画图和点
            cv2.circle(img_hand, (x,y), 2, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
            cv2.putText(img_hand, str(i), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), 1,cv2.LINE_AA)
        else:
            # 获取坐标点
            x,y=(0, 0)
            f2.write(str(x)+","+str(y)+"  ")


    if label=='a':
        gg=0
    elif label=='b':
        gg=1
    elif label=='c':
        gg=2
    else:
        gg=9
    f2.write(str(gg)+"  "+file_name+"  \n")

    return img_hand


f1=open("data2/val2/face_point.txt","w+")
f2=open("data2/val2/hand_point.txt","w+")
path = "data2/val2/"
save_path = "data2/0/"

n=0
for root,dirs,files in os.walk(path):
    for file in files:
        if file.endswith('jpg'):
            file_name=root+"/"+file
            label=root.split('/')[-1]
            n+=1
            print(n,file_name,label)

            # 获取坐标点
            pic=cv2.imread(file_name)
            img_face=face_detect(pic)   #人脸检测
            img_hand=hand_detect(pic, label, img_face, file_name)   #手部检测
            
            cv2.imwrite(save_path+label+"/"+file,img_hand)

f1.close()
f2.close()

4.计算距离特征

import glob
import os
import numpy as np
from numpy import *
import cv2


# 4.计算特征
def feature(data_hand, label_hand, data_face, max_num):
    feature=[]
    # 计算欧氏距离
    num=data_hand.shape[0]
    for i in range(num):
        label=label_hand[i]

        key_point_hand1=[8,12,16]
        # key_point_hand2=[7,11,15,20]
        
        # 计算手部指尖中心坐标
        hand_point,mask=center_point(i, data_hand, key_point_hand1)

        # 判断图片是否有手
        if mask<=2:
            # print("action_Z:no hand!","mack:",mask)
            continue

        # 左右眼的坐标(计算中心点)
        left_eye=[37,38,40,41]
        right_eye=[43,44,46,47]

        # 向量距离的坐标(指尖和眼睛)
        vector_point_hand=[8,12,16]
        vector_left_eye=[37,38,40]
        vector_right_eye=[43,44,46]

        vector_hand=vector(i, data_hand,vector_point_hand)   #指尖的向量

        # 判断手在左边还是右边(依据:指尖与左右外眼角的距离)
        if abs(hand_point[0]-data_face[i][36][0]) <= abs(hand_point[0]-data_face[i][45][0]):
            eye_point,_=center_point(i, data_face, left_eye)   #眼睛的中心坐标
            inner_eye_point=data_face[i][39]                   #内眼角坐标
            vector_eye=vector(i, data_face,vector_left_eye)   #眼睛的向量
        else:
            eye_point,_=center_point(i, data_face, right_eye)
            inner_eye_point=data_face[i][42]
            vector_eye=vector(i, data_face,vector_right_eye)


        # 特征1:计算欧氏距离
        featur1=int(np.sqrt(np.sum(np.square(hand_point-eye_point))))

        # 特征2:计算指尖中心点到内眼角的欧氏距离
        featur2=int(np.sqrt(np.sum(np.square(hand_point-inner_eye_point))))

        # 特征3:计算指尖与眼睛的y轴距离
        featur3=int(abs(hand_point[1]-eye_point[1]))

        # 特征4:计算向量的欧氏距离
        featur4=int(np.sqrt(np.sum(np.square(mat(vector_hand)-mat(vector_eye)))))
        
        # 特征5:计算向量的曼哈顿距离
        featur5=int(sum(abs(mat(hand_point)-mat(eye_point))))

        # 特征归一化
        if len(max_num)>=3:
            featur1=featur1/max_num[0]
            featur2=featur2/max_num[1]
            featur3=featur3/max_num[2]
            featur4=featur4/max_num[3]
            featur5=featur5/max_num[4]

        # 写入txt
        f3.write(str(featur1)+"  "+str(featur2)+"  "+str(featur3)+"  "+str(featur4)+"  "+str(featur5)+"  "+label+"\n")
        
        feature.append((featur1,featur2,featur3,featur4,featur5))

        # print("欧氏距离:{}".format(featur1),"标签:{}".format(label))
        # print("指尖中心点到内眼角的欧氏距离:{}".format(featur2),"标签:{}".format(label))
        # print("指尖与眼睛的y轴距离:{}".format(featur3),"标签:{}".format(label))
        # print ('向量欧氏距离:',featur4,"标签:{}".format(label))
        # print("向量曼哈顿距离:",featur5,"标签:{}".format(label))

        # print(featur1,featur2,featur3,featur4,featur5,label)

    return feature


# 3.坐标向量
def vector(i, data,key_point):
    vector=[]
    for idx in key_point:
        x=data[i][idx][0]
        y=data[i][idx][1]
        vector.append((x,y))
    return vector

# 2.计算中心坐标点
def center_point(i, data, key_point):
    # 计算手尖坐标
    sum_x=0
    sum_y=0
    n=0
    for idx in key_point:
        x=data[i][idx][0]
        y=data[i][idx][1]
        # print(x,y)
        if x!=0:
            sum_x+=x
            sum_y+=y
            n+=1
    if n!=0:
        avg_x=int(sum_x/n)
        avg_y=int(sum_y/n)
    else:
        avg_x=avg_y=0

    point=(avg_x,avg_y)


    return np.array(point),n
    
# 1.读取坐标点数据
def read_data(file_name):
    f1=open(file_name,"r")
    lines=f1.readlines()

    pic_num=len(lines)   #照片数量,行数
    point_num=len(lines[0].split('  '))   #手部关键点数量

    data=[]   #存所有照片的手部关键点
    label=[]
    for n in range(pic_num):
        point=[]   #存每一个关键点的坐标
        line=lines[n].split('  ')
        
        if point_num==69:
            for i in range(68):
                x=line[i].split(",")[0]
                y=line[i].split(",")[1]
                point.append([int(x),int(y)])   #把一张照片的所有关键点存入point
        else:
            for i in range(22):
                x=line[i].split(",")[0]
                y=line[i].split(",")[1]
                point.append([int(x),int(y)])   #把一张照片的所有关键点存入point
        
        data.append(point)   #把所有照片的关键点存入data
        label.append(line[22])


    data=np.array(data)
    label=np.array(label)
    return data,label



# 获取手部数据、标签
f1="data3/train/hand_point.txt"
data_hand,label_hand=read_data(f1)

# 获取脸部数据
f2="data3/train/face_point.txt"
data_face,_=read_data(f2)
print(data_hand.shape)

# 计算特征
f3=open("data3/train/feature.txt","w+")
max_num=[]   #特征归一化,最大值
feature=feature(data_hand, label_hand, data_face, max_num)

5.使用svm进行训练

from numpy import *
import numpy as np
import cv2
import matplotlib.pyplot as plt
import shutil

def loadDataSet(fileName):
    dataMat = []
    labelMat = []
    with open(fileName) as fr:
        for line in fr.readlines():
            lineArr = line.strip().split('  ')
            # print(lineArr)

            # dataMat.append([float(lineArr[0]), float(lineArr[1]), 
            #                 float(lineArr[2]), float(lineArr[3]), float(lineArr[4])])
            dataMat.append([float(lineArr[0]), float(lineArr[1]), float(lineArr[2])])
            
            labelMat.append([int(lineArr[5])])
    return dataMat, labelMat

#加载训练集
train_data,train_label = loadDataSet('data3/train/feature-shuffle.txt')   #1.加载一个txt数据集
train_data = mat(train_data)
train_data=np.array(train_data, dtype='float32')
train_label = mat(train_label)
# print(train_data)
print(train_label.shape)

#加载测试集
test_data,test_label = loadDataSet('data3/val2/feature.txt')   #1.加载一个txt数据集
test_data = mat(test_data)
test_data=np.array(test_data, dtype='float32')
test_label=mat(test_label)
print(test_label.shape)


# 创建分类器
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)  # SVM类型
svm.setKernel(cv2.ml.SVM_LINEAR) # 使用线性核
svm.setC(1e-5)

# 训练
ret = svm.train(train_data, cv2.ml.ROW_SAMPLE, train_label)
svm.save('data2/train/hand_class.xml')

# 支持向量
vec = svm.getSupportVectors()
print("最终结果:",vec)


# 测试
# svm = cv2.ml.SVM_load("weight/test3/hand_detect.xml")
(ret, res) = svm.predict(test_data)
# print(res)

# # 准确率
# f1=open("data/val2/0/hand_point.txt","r")
# lines=f1.readlines()
# save_path='data/error/val2/'

n=0
lens=len(test_data)
for i in range(lens):
    if res[i]==test_label[i]:
        n=n+1
    # else:
    #     root=lines[i].split('  ')[23]
    #     file_name=root.split('/')[-1]
    #     # print(file_name)
    #     shutil.copy(root,save_path+file_name+str(test_label[i])+".jpg")

Accuracy=n/lens
print("准确度为:",Accuracy)

合并测试:提取特征+svm预测

import sys
import os
import dlib
import cv2
from PIL import Image
import numpy as np
import time
import imutils
from numpy import *


# 4.计算特征
def feature(data_hand, data_face, max_num):
    feature=[]

    # 指尖关键点
    key_point_hand=[8,12,16]

    # 计算手部指尖中心坐标
    hand_point,mask=center_point(data_hand, key_point_hand)
    # print(hand_point,mask)
    
    # 判断图片是否有手
    if mask<=1:
        print("action_Z:no hand!","mack:",mask)
        exit()

    
    # 左右眼的坐标(计算中心点)
    left_eye=[37,38,40,41]
    right_eye=[43,44,46,47]

    # 向量距离的坐标(指尖和眼睛)
    vector_point_hand=[8,12,16,20]
    vector_left_eye=[37,38,40,41]
    vector_right_eye=[43,44,46,47]

    vector_hand=vector(data_hand,vector_point_hand)   #指尖的向量

    # 判断手在左边还是右边(依据:指尖与左右外眼角的距离)
    if abs(hand_point[0]-data_face[36][0]) <= abs(hand_point[0]-data_face[45][0]):
        eye_point,_=center_point(data_face, left_eye)   #眼睛的中心坐标
        inner_eye_point=data_face[39]                   #内眼角坐标
        vector_eye=vector(data_face,vector_left_eye)   #眼睛的向量
    else:
        eye_point,_=center_point(data_face, right_eye)
        inner_eye_point=data_face[42]
        vector_eye=vector(data_face,vector_right_eye)


    # 特征1:计算欧氏距离
    featur1=int(np.sqrt(np.sum(np.square(hand_point-eye_point))))

    # 特征2:计算指尖中心点到内眼角的欧氏距离
    featur2=int(np.sqrt(np.sum(np.square(hand_point-inner_eye_point))))

    # 特征3:计算指尖与眼睛的y轴距离
    featur3=int(abs(hand_point[1]-eye_point[1]))

    # 特征4:计算向量的欧氏距离
    featur4=int(np.sqrt(np.sum(np.square(mat(vector_hand)-mat(vector_eye)))))
    
    # 特征5:计算向量的曼哈顿距离
    featur5=int(sum(abs(mat(vector_hand)-mat(vector_eye))))

    # 特征归一化
    if len(max_num)>=3:
        featur1=featur1/max_num[0]
        featur2=featur2/max_num[1]
        featur3=featur3/max_num[2]
        featur4=featur4/max_num[3]
        featur5=featur5/max_num[4]

    
    feature.append((featur1,featur2,featur3,featur4,featur5))

    # print("欧氏距离:{}".format(featur1),"标签:{}".format(label))
    # print("指尖中心点到内眼角的欧氏距离:{}".format(featur2),"标签:{}".format(label))
    # print("指尖与眼睛的y轴距离:{}".format(featur3),"标签:{}".format(label))
    # print ('向量欧氏距离:',featur4,"标签:{}".format(label))
    # print("向量曼哈顿距离:",featur5,"标签:{}".format(label))

    return feature


# 3.坐标向量
def vector(data,key_point):
    vector=[]
    for idx in key_point:
        x=data[idx][0]
        y=data[idx][1]
        vector.append((x,y))
    return vector


# 2.计算中心坐标点
def center_point(data, key_point):
    # 计算手尖坐标
    sum_x=0
    sum_y=0
    n=0
    for idx in key_point:
        x=data[idx][0]
        y=data[idx][1]
        # print(x,y)
        if x!=0:
            sum_x+=x
            sum_y+=y
            n+=1
    if n!=0:
        avg_x=int(sum_x/n)
        avg_y=int(sum_y/n)
    else:
        avg_x=avg_y=0

    point=(avg_x,avg_y)


    return np.array(point),n


def face_detect(pic):
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor('class/shape_predictor_68_face_landmarks.dat')
    img = np.copy(pic)
    # img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # 人脸数rects
    rects = detector(img, 1)
    if len(rects)!=1:
        print("face detection fail!")
        exit()

    landmarks = np.matrix([[p.x, p.y] for p in predictor(img,rects[0]).parts()])
    points=[]
    for idx, point in enumerate(landmarks):
        x=point[0, 0]
        y=point[0, 1]
        points.append([x,y])

        # # 画图和点
        # cv2.circle(img, (x,y), 9, (0, 255, 0), thickness=-1, lineType=cv2.FILLED)
        # cv2.putText(img, str(idx), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2,cv2.LINE_AA)


    return img, np.array(points)


def hand_detect(pic, label, img_face):
    protoFile = "class/hand/pose_deploy.prototxt"
    weightsFile = "class/hand/pose_iter_102000.caffemodel"
    nPoints = 22
    POSE_PAIRS = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12],
                  [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
    net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
    
    img_hand=np.copy(img_face)   #用来画手的坐标点
    frame = np.copy(pic)   #用来做手部检测
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
    aspect_ratio = frameWidth / frameHeight

    threshold = 0.1

    t = time.time()
    # input image dimensions for the network
    inHeight = 368
    inWidth = int(((aspect_ratio * inHeight) * 8) // 8)
    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)

    net.setInput(inpBlob)

    output = net.forward()
    print("time taken by network : {:.3f}".format(time.time() - t))

    # Empty list to store the detected keypoints
    points = []

    for i in range(nPoints):
        # 对应身体部位的置信度图
        probMap = output[0, i, :, :]
        probMap = cv2.resize(probMap, (frameWidth, frameHeight))
        # cv2.minMaxLoc:返回矩阵的最小值,最大值,并得到最大值,最小值的索引
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)   #找最大值及其下标(即置信度最大的点)

        # 获取坐标点
        x,y=(int(point[0]), int(point[1]))
        points.append([x,y])

        # # 画图和点
        # cv2.circle(img_hand, (x,y), 9, (255, 255, 0), thickness=-1, lineType=cv2.FILLED)
        # cv2.putText(img_hand, str(i), (x,y), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 2,cv2.LINE_AA)

    return img_hand,np.array(points)


path = "data/test2/"
save_path = "data/0/"
svm_model="data/train/hand_class.xml"
for root,dirs,files in os.walk(path):
    for file in files:
        if file.endswith('jpg'):
            file_name=root+"/"+file
            label=root.split('/')[-1]
            # print(file_name,label)
            # 
            # 

            # 获取坐标点
            pic=cv2.imread(file_name)
            img_face, point_face=face_detect(pic)   #人脸检测
            img_hand, point_hand=hand_detect(pic, label, img_face)   #手部检测

            # print(point_face.shape, point_hand.shape)
            # cv2.imwrite(save_path+label+"/"+file,img_hand)

            # 计算特征
            max_num=[]   #特征归一化,最大值
            features=feature(point_hand, point_face, max_num)
            features=mat(features)
            features=np.array(features, dtype='float32')
            print("特征:",features)


            # 创建分类器
            svm = cv2.ml.SVM_create()
            svm.setType(cv2.ml.SVM_C_SVC)  # SVM类型
            svm.setKernel(cv2.ml.SVM_LINEAR) # 使用线性核
            svm.setC(1e-5)

            # 加载模型、测试
            svm = cv2.ml.SVM_load(svm_model)
            (ret, res) = svm.predict(features)
            print("预测结果:",res)