Mediapipe implements gesture recognition (rock paper scissors)
Environment: python3.8, pycharm2020
Hardware: Logitech c505e
Based on the previous article, we have been able to extract the information of hand key points, and we can easily perform gesture recognition by processing this information. (See the end of the article for the complete code)
First look at a wave of renderings:
Wrapper function (skipable)
For the convenience of calling, first extract the relevant functions from the previous key points and divide them into classes
import cv2
import mediapipe as mp
import time
import math
class handDetctor():
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands,
self.detectionCon, self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
def findHands(self, img, draw=True, ):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)#转换为rgb
self.results = self.hands.process(imgRGB)
# print(results.multi_hand_landmarks)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
# print(id, lm)
# 获取手指关节点
h, w, c = img.shape
cx, cy = int(lm.x*w), int(lm.y*h)
lmList.append([id, cx, cy])
if draw:
cv2.putText(img, str(int(id)), (cx+10, cy+10), cv2.FONT_HERSHEY_PLAIN,
1, (0, 0, 255), 2)
return lmList
# 调用方式
def main():
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
# 帧率统计
pTime = 0
cTime = 0
detector = handDetctor()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList = detector.findPosition(img, draw=False)
if len(lmList) != 0:
print(lmList)
# 统计屏幕帧率
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)
cv2.imshow("image", img)
if cv2.waitKey(2) & 0xFF == 27:
break
cap.release()
if __name__ == '__main__':
main()
Gesture judgment
Based on the mediapipe, we have been able to obtain the coordinate positions of the key points of the fingers. Further, we only need to judge the opening and closing status of each finger to obtain the gesture. First paste the following key point distribution map:
The following is an example of judging the opening and closing of the index finger:
When the index finger is straight, we can easily find that the distance from 8 o'clock to 0 o'clock is significantly larger than that from 6 o'clock to 0 o'clock.
When the index finger is retracted, vice versa
Judge each finger:
def fingerStatus(self, lmList):
fingerList = []
id, originx, originy = lmList[0]
keypoint_list = [[2, 4], [6, 8], [10, 12], [14, 16], [18, 20]]
for point in keypoint_list:
id, x1, y1 = lmList[point[0]]
id, x2, y2 = lmList[point[1]]
if math.hypot(x2-originx, y2-originy) > math.hypot(x1-originx, y1-originy):
fingerList.append(True)
else:
fingerList.append(False)
return fingerList
Call: thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen = detector.fingerStatus(lmList)
, it should be noted that the coordinates of the 21 marker points must be obtained first.
full code
HandTrackingModule.py
import cv2
import mediapipe as mp
import time
import math
class handDetctor():
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands,
self.detectionCon, self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
def findHands(self, img, draw=True, ):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)#转换为rgb
self.results = self.hands.process(imgRGB)
# print(results.multi_hand_landmarks)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
if draw:
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0, draw=True):
lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
# print(id, lm)
# 获取手指关节点
h, w, c = img.shape
cx, cy = int(lm.x*w), int(lm.y*h)
lmList.append([id, cx, cy])
if draw:
cv2.putText(img, str(int(id)), (cx+10, cy+10), cv2.FONT_HERSHEY_PLAIN,
1, (0, 0, 255), 2)
return lmList
# 返回列表 包含每个手指的开合状态
def fingerStatus(self, lmList):
fingerList = []
id, originx, originy = lmList[0]
keypoint_list = [[2, 4], [6, 8], [10, 12], [14, 16], [18, 20]]
for point in keypoint_list:
id, x1, y1 = lmList[point[0]]
id, x2, y2 = lmList[point[1]]
if math.hypot(x2-originx, y2-originy) > math.hypot(x1-originx, y1-originy):
fingerList.append(True)
else:
fingerList.append(False)
return fingerList
def main():
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
# 帧率统计
pTime = 0
cTime = 0
detector = handDetctor()
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList = detector.findPosition(img, draw=False)
if len(lmList) != 0:
# print(lmList)
print(detector.fingerStatus(lmList))
# 统计屏幕帧率
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(img, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)
cv2.imshow("image", img)
if cv2.waitKey(2) & 0xFF == 27:
break
cap.release()
if __name__ == '__main__':
main()
gestureRecognition.py
import time
import cv2
import os
import HandTrackingModule as htm
wCam, hCam = 640, 480
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
cap.set(3, wCam)
cap.set(4, hCam)
# 缓冲图像
picture_path = "gesture_picture"
myList = os.listdir(picture_path)
print(myList)
overlayList = []
for imPath in myList:
image = cv2.imread(f'{picture_path}/{imPath}')
overlayList.append(image)
detector = htm.handDetctor(detectionCon=0.7)
while True:
success, img = cap.read()
img = detector.findHands(img)
lmList = detector.findPosition(img, draw=False)
if len(lmList) != 0:
thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen = detector.fingerStatus(lmList)
if not firstOpen and not secondOpen and not thirdOpen and not fourthOpen:
img[0:200, 0:200] = overlayList[1]
if firstOpen and secondOpen and not thirdOpen and not fourthOpen:
img[0:200, 0:200] = overlayList[0]
if firstOpen and secondOpen and thirdOpen and fourthOpen:
img[0:200, 0:200] = overlayList[2]
cv2.imshow("image", img)
if cv2.waitKey(2) & 0xFF == 27:
break
Related Links
https://gist.github.com/TheJLifeX/74958cc59db477a91837244ff598ef4a