Mediapipe Gesture Recognition

Mediapipe implements gesture recognition (rock paper scissors)


Environment: python3.8, pycharm2020
Hardware: Logitech c505e

Based on the previous article, we have been able to extract the information of hand key points, and we can easily perform gesture recognition by processing this information. (See the end of the article for the complete code)

First look at a wave of renderings:
insert image description hereinsert image description here

Wrapper function (skipable)

For the convenience of calling, first extract the relevant functions from the previous key points and divide them into classes

import cv2
import mediapipe as mp
import time
import math


class handDetctor():
    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.trackCon = trackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands,
                                        self.detectionCon, self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils

    def findHands(self, img, draw=True, ):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)#转换为rgb
        self.results = self.hands.process(imgRGB)

        # print(results.multi_hand_landmarks)
        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)

        return img

    def findPosition(self, img, handNo=0, draw=True):
        lmList = []
        if self.results.multi_hand_landmarks:
            myHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(myHand.landmark):
                # print(id, lm)
                # 获取手指关节点
                h, w, c = img.shape
                cx, cy = int(lm.x*w), int(lm.y*h)
                lmList.append([id, cx, cy])
                if draw:
                    cv2.putText(img, str(int(id)), (cx+10, cy+10), cv2.FONT_HERSHEY_PLAIN,
                                1, (0, 0, 255), 2)

        return lmList

# 调用方式
def main():
    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
    # 帧率统计
    pTime = 0
    cTime = 0
    detector = handDetctor()
    while True:
        success, img = cap.read()

        img = detector.findHands(img)
        lmList = detector.findPosition(img, draw=False)
        if len(lmList) != 0:
            print(lmList)

        # 统计屏幕帧率
        cTime = time.time()
        fps = 1 / (cTime - pTime)
        pTime = cTime
        cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)

        cv2.imshow("image", img)
        if cv2.waitKey(2) & 0xFF == 27:
            break

    cap.release()


if __name__ == '__main__':
    main()

Gesture judgment

Based on the mediapipe, we have been able to obtain the coordinate positions of the key points of the fingers. Further, we only need to judge the opening and closing status of each finger to obtain the gesture. First paste the following key point distribution map:
insert image description here

The following is an example of judging the opening and closing of the index finger:

When the index finger is straight, we can easily find that the distance from 8 o'clock to 0 o'clock is significantly larger than that from 6 o'clock to 0 o'clock.
When the index finger is retracted, vice versa

Judge each finger:

def fingerStatus(self, lmList):

    fingerList = []
    id, originx, originy = lmList[0]
    keypoint_list = [[2, 4], [6, 8], [10, 12], [14, 16], [18, 20]]
    for point in keypoint_list:
        id, x1, y1 = lmList[point[0]]
        id, x2, y2 = lmList[point[1]]
        if math.hypot(x2-originx, y2-originy) > math.hypot(x1-originx, y1-originy):
            fingerList.append(True)
        else:
            fingerList.append(False)

    return fingerList

Call: thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen = detector.fingerStatus(lmList), it should be noted that the coordinates of the 21 marker points must be obtained first.

full code

HandTrackingModule.py

import cv2
import mediapipe as mp
import time
import math

class handDetctor():
    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.trackCon = trackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands,
                                        self.detectionCon, self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils

    def findHands(self, img, draw=True, ):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)#转换为rgb
        self.results = self.hands.process(imgRGB)

        # print(results.multi_hand_landmarks)
        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)

        return img

    def findPosition(self, img, handNo=0, draw=True):
        lmList = []
        if self.results.multi_hand_landmarks:
            myHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(myHand.landmark):
                # print(id, lm)
                # 获取手指关节点
                h, w, c = img.shape
                cx, cy = int(lm.x*w), int(lm.y*h)
                lmList.append([id, cx, cy])
                if draw:
                    cv2.putText(img, str(int(id)), (cx+10, cy+10), cv2.FONT_HERSHEY_PLAIN,
                                1, (0, 0, 255), 2)

        return lmList

    # 返回列表 包含每个手指的开合状态
    def fingerStatus(self, lmList):

        fingerList = []
        id, originx, originy = lmList[0]
        keypoint_list = [[2, 4], [6, 8], [10, 12], [14, 16], [18, 20]]
        for point in keypoint_list:
            id, x1, y1 = lmList[point[0]]
            id, x2, y2 = lmList[point[1]]
            if math.hypot(x2-originx, y2-originy) > math.hypot(x1-originx, y1-originy):
                fingerList.append(True)
            else:
                fingerList.append(False)

        return fingerList

def main():
    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
    # 帧率统计
    pTime = 0
    cTime = 0
    detector = handDetctor()
    while True:
        success, img = cap.read()

        img = detector.findHands(img)
        lmList = detector.findPosition(img, draw=False)
        if len(lmList) != 0:
            # print(lmList)
            print(detector.fingerStatus(lmList))

        # 统计屏幕帧率
        cTime = time.time()
        fps = 1 / (cTime - pTime)
        pTime = cTime
        cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)

        cv2.imshow("image", img)
        if cv2.waitKey(2) & 0xFF == 27:
            break

    cap.release()


if __name__ == '__main__':
    main()

gestureRecognition.py

import time
import cv2
import os
import HandTrackingModule as htm
wCam, hCam = 640, 480
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
cap.set(3, wCam)
cap.set(4, hCam)

# 缓冲图像
picture_path = "gesture_picture"
myList = os.listdir(picture_path)
print(myList)
overlayList = []
for imPath in myList:
    image = cv2.imread(f'{picture_path}/{imPath}')
    overlayList.append(image)

detector = htm.handDetctor(detectionCon=0.7)


while True:
    success, img = cap.read()

    img = detector.findHands(img)
    lmList = detector.findPosition(img, draw=False)
    if len(lmList) != 0:
        thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen = detector.fingerStatus(lmList)
        if not firstOpen and not secondOpen and not thirdOpen and not fourthOpen:
            img[0:200, 0:200] = overlayList[1]
        if firstOpen and secondOpen and not thirdOpen and not fourthOpen:
            img[0:200, 0:200] = overlayList[0]
        if firstOpen and secondOpen and thirdOpen and fourthOpen:
            img[0:200, 0:200] = overlayList[2]
    cv2.imshow("image", img)
    if cv2.waitKey(2) & 0xFF == 27:
        break

Related Links

https://gist.github.com/TheJLifeX/74958cc59db477a91837244ff598ef4a

Guess you like

Origin blog.csdn.net/qq_43550173/article/details/116273477