目前,在网上大部分实现手势识别的算法,都是基于肤色检测和凸包检测。此算法虽然运算速度较快,但最大的弊端就是对手势背景要求较高(也就是说只要背景有跟皮肤类似的颜色出现,就很难将手势分割出来),抗干扰能力较差,且人体其他部位(如头部)对算法实现的影响也比较大。基于此,本文主要采取openpose中手部20个关键点检测,通过比较各个关键点之间的位置关系,来实现精度较高、抗干扰能力较强的手势识别算法。

openpose关键点检测效果如图:

手部20个关键点检测

其算法原理以识别“手势1”和“手势5”为例:

图1  手势1
图2  手势5

第一步:以每个手指第一个关节点(序号3、7、11、15、19)和手掌根部点(序号0)为边界,通过cv2.minEnclosingCircle()函数拟合一个检测圆(图上蓝色圆);

第二步:分别计算每个手指最外侧点(序号4、8、12、16、20,分别对应大拇指、食指、中指、无名指、小拇指的最外侧)与检测圆(图上蓝色圆)的距离;

第三步:根据5个距离的情况组合,来判断手势的类型。若只有8号检测点在圆圈的外部(表示食指是竖起来的),4号、12号、16号、20号检测点均在圆圈的内部(表示其余四个指头是握着的),则为“手势1”,图1右上角显示检测的结果“this is one”;若4、8、12、16、20号监测点均在圆圈外(表示5个手指全部伸直)则为“手势5”,图2右上角显示检测的结果“this is five”。

以此类推,根据其他检测点与圆圈的位置关系组合,除以上提到的2种手势外,还实现了8种手势,效果如下图所示。

手势2
手势4
手势8
手势“OK“”

手势“比心”
手势“竖大拇指”
手势“握拳”
手势“竖中指”

准备工作(基于ROS机器人框架):

1.通过ROS框架启动摄像头。

2.下载手部检测点CAFFE库。

3.运行以下源代码实现检测和显示功能。

#!/usr/bin/env python
#-*- coding: utf-8 -*-
from __future__ import print_function


import sys
import rospy
import time
import cv2 
import math
from std_msgs.msg import String
from sensor_msgs.msg import Image
from cv_bridge import CvBridge, CvBridgeError
import numpy as np
from sensor_msgs.msg import CompressedImage
from geometry_msgs.msg import Twist, Vector3
# from base import Event

protoFile = "/home/xtark/catkin_ws/src/xtark_tutorials/scripts/pose_deploy.prototxt"#必须是绝对路径
weightsFile = "/home/xtark/catkin_ws/src/xtark_tutorials/scripts/pose_iter_102000.caffemodel"
nPoints = 22
#用于画检测点连线
POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ]
threshold = 0.2
#导入caffe训练模块
net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
flag=np.array([-1,-1])

class image_converter:

  def __init__(self):
    # 定义处理前图像订阅器
    self.image_sub = rospy.Subscriber("/usb_cam/image_raw",Image,self.callback)
    # 定义处理后图像发布器
    self.image_pub = rospy.Publisher("/image_topic_2",Image, queue_size =3)
    # 定义原图和opencv图转换器
    self.bridge = CvBridge()


  def callback(self,data):
    # 将原图转为opencv图
    try:
      frame = self.bridge.imgmsg_to_cv2(data, "bgr8")
    except CvBridgeError as e:
      print(e)
   #初始化
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
    aspect_ratio = frameWidth/frameHeight
    inHeight = 368
    inWidth = int(((aspect_ratio*inHeight)*8)//8)
    t = time.time()
    # 读取每一帧的数据
    frameCopy = np.copy(frame)
    # blobFromImage将图像转为blob
    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False, crop=False)
    net.setInput(inpBlob)
    # forward实现网络推断
    # 模型可生成22个关键点,其中21个点是人手部的,第22个点代表着背景
    output = net.forward()
    #计算检测时间
    print("forward = {}".format(time.time() - t))
    # print(output)

    # Empty list to store the detected keypoints
    points = []
    limit_points=[]
    for i in range(nPoints):
      probMap = output[0, i, :, :]
      probMap = cv2.resize(probMap, (frameWidth, frameHeight))
      # print(probMap)
      # 找到精确位置
      minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
      # print(prob)
      if prob > threshold :
        cv2.circle(frameCopy, (int(point[0]), int(point[1])), 6, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
        cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, .8, (0, 0, 255), 2, lineType=cv2.LINE_AA)
        # points.append((int(point[0]), int(point[1])))
        points.append(np.array([int(point[0]), int(point[1])]))
        # points.append(np.array((int(point[0]), int(point[1]))))
      else :
        # points.append(None)
        points.append(flag)
    #连接拟合圆形所需的点
    limit = np.concatenate(([points[0]],[points[3]],[points[7]],[points[11]],[points[15]],[points[19]]),axis=0)
    #剔除无效点
    for i in limit:
      if (i == flag).all():
        pass
      else:
        limit_points.append(i)
    limit_points=np.array(limit_points)
    # print(limit_points)
    # print(tuple(points[8]))

    # 多边形判断法
    # result_one = cv2.pointPolygonTest(limit_points,tuple(points[8]),False)
    # print(result_one)
    # epsilon = 0.02*cv2.arcLength(limit_points,True)
    # approx = cv2.approxPolyDP(limit_points,epsilon,True)
    # frameCopy = cv2.drawContours(frameCopy,[approx],0,(255,0,0),2)
  #拟合圆
    (x,y),radius = cv2.minEnclosingCircle(limit_points)
    # print(x,y,radius)
    center = (int(x),int(y))
    radius = int(radius)
    flag_distance = radius * 0.1
    # print(points[4],points[8],points[12],points[16],points[20])
    # 计算每个判断点到圆边界的距离
    if (points[4]!= flag).all():
      distance_4 = np.linalg.norm(points[4]-center)-radius
    else:
      distance_4 = 0
    if (points[8]!= flag).all():
      distance_8 = np.linalg.norm(points[8]-center)-radius
    else:
      distance_8 = 0
    if (points[12]!= flag).all():
     distance_12 = np.linalg.norm(points[12]-center)-radius
    else:
      distance_12 = 0
    if (points[16]!= flag).all():
     distance_16 = np.linalg.norm(points[16]-center)-radius
    else:
      distance_16 = 0
    if (points[20]!= flag).all():
     distance_20 = np.linalg.norm(points[20]-center)-radius
    else:
      distance_20 = 0
    # print(distance_4,distance_8,distance_12,distance_16,distance_20,flag_distance)
    cv2.circle(frameCopy,center,radius,(255,0,0),2)
    # print([distance_4,distance_12,distance_16,distance_20])
    # 依据判断点到圆边界的距离来推算手势类型
    if distance_8 >= flag_distance and (np.array([distance_4,distance_12,distance_16,distance_20])<flag_distance).all():
      result = "this is one"
    elif (np.array([distance_8,distance_12])>=flag_distance).all() and (np.array([distance_4,distance_16,distance_20])<flag_distance).all():
      result = "this is two"
    elif (np.array([distance_8,distance_12,distance_16,distance_20])>=flag_distance).all() and distance_4<flag_distance:
      result = "this is four"
    elif (np.array([distance_4,distance_8,distance_12,distance_16,distance_20])>=flag_distance).all():
      result = "this is five"
    elif (np.array([distance_4,distance_8,distance_12,distance_16,distance_20])<=flag_distance).all():
      result = "this is fist"
    elif (np.array([distance_4,distance_8])>=flag_distance).all() and (np.array([distance_12,distance_16,distance_20])<flag_distance).all():
      result = "this is eight"
    elif distance_4>=flag_distance and (np.array([distance_8,distance_16,distance_20])<flag_distance).all():
        result = "this is good" 
    elif distance_12>=flag_distance and (np.array([distance_4,distance_8,distance_16,distance_20])<flag_distance).all():
      result = "this is out" 
    elif (np.array([distance_12,distance_16])>=flag_distance).all() and distance_4<flag_distance:
      result = "this is OK"
    elif (np.array([distance_16,distance_20])>=flag_distance).all() and distance_4<flag_distance:
      result = "this is love"
    else:
      result = "can't find"


  #把手势类型名称画在图上
    cv2.putText(frameCopy, result, (430, 80), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2, lineType=cv2.LINE_AA)


    # 画出关键点
    for pair in POSE_PAIRS:
      partA = pair[0]
      partB = pair[1]

      # if points[partA] and points[partB]:
      cv2.line(frame, tuple(points[partA]), tuple(points[partB]), (0, 255, 255), 2, lineType=cv2.LINE_AA)
      cv2.circle(frame, tuple(points[partA]), 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
      cv2.circle(frame, tuple(points[partB]), 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)


    try:
        self.image_pub.publish(self.bridge.cv2_to_imgmsg(frame, "bgr8"))#8UC1为灰度图,bgr8为彩色图
    except CvBridgeError as e:
        print(e)



if __name__ == '__main__':
  rospy.init_node('image_converter', anonymous=True)
  ic = image_converter()
  print('start')
  try:
    rospy.spin()
  except rospy.ROSInterruptException:
    print('exception')

问题:

因本算法是基于树莓派读取摄像头信息,然后通过在win10中虚拟机(Linux)系统运行核心代码,因此全程没有GPU加速,检测速度比较慢(大约3秒钟检测一帧),所有没有实现实时检测功能(3秒钟卡一下有点难受)。要是有朋友能通过GPU加速运行此代码,那么实时检测的效果应该是比较理想的。