Gesture Recognition Series Article Directory
Gesture recognition is a human-computer interaction technology that realizes the operation and control of computers, smart phones, smart TVs and other devices by recognizing human gestures.
1. Opencv implements hand tracking (locating key points of the hand)
3. Gesture recognition - gesture volume control (opencv)
4. Opencv practical project gesture recognition - gesture control mouse
5. opencv combat project gesture recognition - hand distance measurement
to be continued
This project uses Google's open source framework mediapipe , which has a lot of models for us to use, such as face detection, body detection, hand detection, etc.
The code needs to use the opencv HandTraqckModule module mediapipe module
One, HandTraqckModule module
This time we will continue to add new content to the HandTraqckModule module. You can skip it if you already know it, just copy and paste it.
import cv2
import mediapipe as mp
import math
Define HandDetector
class:
class HandDetector:
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, minTrackCon=0.5):
# 初始化参数
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.minTrackCon = minTrackCon
# 初始化 Mediapipe 的手部检测模块和绘制工具
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(static_image_mode=self.mode, max_num_hands=self.maxHands,
min_detection_confidence=self.detectionCon, min_tracking_confidence=self.minTrackCon)
self.mpDraw = mp.solutions.drawing_utils
self.tipIds = [4, 8, 12, 16, 20]
self.fingers = []
self.lmList = []
Define findHands
the method for detecting hands in an image:
def findHands(self, img, draw=True, flipType=True):
# 将图像从 BGR 转换为 RGB 格式
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# 使用 Mediapipe 进行手部检测
self.results = self.hands.process(imgRGB)
allHands = []
h, w, c = img.shape
if self.results.multi_hand_landmarks:
for handType, handLms in zip(self.results.multi_handedness, self.results.multi_hand_landmarks):
myHand = {}
# 提取关键点的像素坐标并存储在 mylmList 中
mylmList = []
xList = []
yList = []
for id, lm in enumerate(handLms.landmark):
px, py = int(lm.x * w), int(lm.y * h)
mylmList.append([px, py])
xList.append(px)
yList.append(py)
# 计算边界框信息
xmin, xmax = min(xList), max(xList)
ymin, ymax = min(yList), max(yList)
boxW, boxH = xmax - xmin, ymax - ymin
bbox = xmin, ymin, boxW, boxH
cx, cy = bbox[0] + (bbox[2] // 2), bbox[1] + (bbox[3] // 2)
myHand["lmList"] = mylmList
myHand["bbox"] = bbox
myHand["center"] = (cx, cy)
if flipType:
if handType.classification[0].label == "Right":
myHand["type"] = "Left"
else:
myHand["type"] = "Right"
else:
myHand["type"] = handType.classification[0].label
allHands.append(myHand)
# 在图像上绘制手部信息
if draw:
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
(bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
(255, 0, 255), 2)
cv2.putText(img, myHand["type"], (bbox[0] - 30, bbox[1] - 30), cv2.FONT_HERSHEY_PLAIN,
2, (255, 0, 255), 2)
if draw:
return allHands, img # 返回检测到的手部信息和绘制后的图像
else:
return allHands # 只返回检测到的手部信息,不进行绘制
Define fingersUp
the method to detect how many fingers are open:
def fingersUp(self, myHand):
# 获取手部信息
myHandType = myHand["type"]
myLmList = myHand["lmList"]
if self.results.multi_hand_landmarks:
fingers = []
# 检测拇指
if myHandType == "Right":
if myLmList[self.tipIds[0]][0] > myLmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
else:
if myLmList[self.tipIds[0]][0] < myLmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
# 检测其他手指
for id in range(1, 5):
if myLmList[self.tipIds[id]][1] < myLmList[self.tipIds[id] - 2][1]:
fingers.append(1)
else:
fingers.append(0)
return fingers
Finally, main
the function uses HandDetector
the class to detect hands and plot the detections in the image:
def main():
cap = cv2.VideoCapture(0)
detector = HandDetector(detectionCon=0.8, maxHands=2)
while True:
# 获取图像帧
success, img = cap.read()
# 检测手部并获取手部信息和绘制后的图像
hands, img = detector.findHands(img)
if hands:
# 处理检测到的手部信息,如关键点、边界框、手型等
# ...
# 显示图像
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
In this loop, the program captures image frames from the camera, then uses HandDetector
the class to detect hands and plot the detection results. You can add code to get hand information and process it as needed.
all codes
"""
Hand Tracking Module
By: Computer Vision Zone
Website: https://www.computervision.zone/
"""
import cv2
import mediapipe as mp
import math
class HandDetector:
"""
Finds Hands using the mediapipe library. Exports the landmarks
in pixel format. Adds extra functionalities like finding how
many fingers are up or the distance between two fingers. Also
provides bounding box info of the hand found.
"""
def __init__(self, mode=False, maxHands=2, detectionCon=0.5, minTrackCon=0.5):
"""
:param mode: In static mode, detection is done on each image: slower
:param maxHands: Maximum number of hands to detect
:param detectionCon: Minimum Detection Confidence Threshold
:param minTrackCon: Minimum Tracking Confidence Threshold
"""
self.mode = mode
self.maxHands = maxHands
self.detectionCon = detectionCon
self.minTrackCon = minTrackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(static_image_mode=self.mode, max_num_hands=self.maxHands,
min_detection_confidence=self.detectionCon, min_tracking_confidence = self.minTrackCon)
self.mpDraw = mp.solutions.drawing_utils
self.tipIds = [4, 8, 12, 16, 20]
self.fingers = []
self.lmList = []
def findHands(self, img, draw=True, flipType=True):
"""
Finds hands in a BGR image.
:param img: Image to find the hands in.
:param draw: Flag to draw the output on the image.
:return: Image with or without drawings
"""
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
allHands = []
h, w, c = img.shape
if self.results.multi_hand_landmarks:
for handType,handLms in zip(self.results.multi_handedness,self.results.multi_hand_landmarks):
myHand={}
## lmList
mylmList = []
xList = []
yList = []
for id, lm in enumerate(handLms.landmark):
px, py = int(lm.x * w), int(lm.y * h)
mylmList.append([px, py])
xList.append(px)
yList.append(py)
## bbox
xmin, xmax = min(xList), max(xList)
ymin, ymax = min(yList), max(yList)
boxW, boxH = xmax - xmin, ymax - ymin
bbox = xmin, ymin, boxW, boxH
cx, cy = bbox[0] + (bbox[2] // 2), \
bbox[1] + (bbox[3] // 2)
myHand["lmList"] = mylmList
myHand["bbox"] = bbox
myHand["center"] = (cx, cy)
if flipType:
if handType.classification[0].label =="Right":
myHand["type"] = "Left"
else:
myHand["type"] = "Right"
else:myHand["type"] = handType.classification[0].label
allHands.append(myHand)
## draw
if draw:
self.mpDraw.draw_landmarks(img, handLms,
self.mpHands.HAND_CONNECTIONS)
cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
(bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
(255, 0, 255), 2)
cv2.putText(img,myHand["type"],(bbox[0] - 30, bbox[1] - 30),cv2.FONT_HERSHEY_PLAIN,
2,(255, 0, 255),2)
if draw:
return allHands,img
else:
return allHands
def fingersUp(self,myHand):
"""
Finds how many fingers are open and returns in a list.
Considers left and right hands separately
:return: List of which fingers are up
"""
myHandType =myHand["type"]
myLmList = myHand["lmList"]
if self.results.multi_hand_landmarks:
fingers = []
# Thumb
if myHandType == "Right":
if myLmList[self.tipIds[0]][0] > myLmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
else:
if myLmList[self.tipIds[0]][0] < myLmList[self.tipIds[0] - 1][0]:
fingers.append(1)
else:
fingers.append(0)
# 4 Fingers
for id in range(1, 5):
if myLmList[self.tipIds[id]][1] < myLmList[self.tipIds[id] - 2][1]:
fingers.append(1)
else:
fingers.append(0)
return fingers
def main():
cap = cv2.VideoCapture(0)
detector = HandDetector(detectionCon=0.8, maxHands=2)
while True:
# Get image frame
success, img = cap.read()
# Find the hand and its landmarks
hands, img = detector.findHands(img) # with draw
# hands = detector.findHands(img, draw=False) # without draw
if hands:
# Hand 1
hand1 = hands[0]
lmList1 = hand1["lmList"] # List of 21 Landmark points
bbox1 = hand1["bbox"] # Bounding box info x,y,w,h
centerPoint1 = hand1['center'] # center of the hand cx,cy
handType1 = hand1["type"] # Handtype Left or Right
fingers1 = detector.fingersUp(hand1)
if len(hands) == 2:
# Hand 2
hand2 = hands[1]
lmList2 = hand2["lmList"] # List of 21 Landmark points
bbox2 = hand2["bbox"] # Bounding box info x,y,w,h
centerPoint2 = hand2['center'] # center of the hand cx,cy
handType2 = hand2["type"] # Hand Type "Left" or "Right"
fingers2 = detector.fingersUp(hand2)
# Find Distance between two Landmarks. Could be same hand or different hands
length, info, img = detector.findDistance(lmList1[8], lmList2[8], img) # with draw
# length, info = detector.findDistance(lmList1[8], lmList2[8]) # with draw
# Display
cv2.imshow("Image", img)
cv2.waitKey(1)
if __name__ == "__main__":
main()
----------------------------------------Dividing line-------- ---------------------
For the hand detection module this time, we have optimized the position detection and integrated it into the hand detection
2. Main module
The idea is: calculate the position information of the two key points 5 and 17, and then calculate the Euclidean distance between the two points. This distance changes with the movement of the hand and the pixel distance shown in the figure. We measure the parts in order Position change information, so as to design a function to match the position change relationship. (Of course, the palms of different people will be different, this is just an approximate distance, the error is about 3%)
Next is the main module code
Import the necessary libraries and modules:
import cv2
from HandTrackingModule import HandDetector
import math
import numpy as np
import cvzone
Set camera parameters and hand detector:
cap = cv2.VideoCapture(0)
cap.set(3, 1280) # 设置摄像头宽度
cap.set(4, 720) # 设置摄像头高度
detector = HandDetector(detectionCon=0.8, maxHands=1) # 创建 HandDetector 实例
Define a function for mapping hand distances to centimeter values:
# Find Function
x = [300, 245, 200, 170, 145, 130, 112, 103, 93, 87, 80, 75, 70, 67, 62, 59, 57]
y = [20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]
coff = np.polyfit(x, y, 2) # 使用二次多项式拟合数据,得到系数 A, B, C
Enter the main loop to process real-time camera images:
while True:
success, img = cap.read()
hands = detector.findHands(img, draw=False) # 在图像中检测手部,不进行绘制
if hands:
# 获取手部信息
lmList = hands[0]['lmList'] # 关键点列表
x, y, w, h = hands[0]['bbox'] # 边界框坐标和尺寸
x1, y1 = lmList[5] # 大拇指第一个关键点的坐标
x2, y2 = lmList[17] # 小指最后一个关键点的坐标
# 计算两点之间的欧几里得距离
distance = int(math.sqrt((y2 - y1) ** 2 + (x2 - x1) ** 2))
# 将距离映射到厘米值
A, B, C = coff
distanceCM = A * distance ** 2 + B * distance + C
# 在图像中绘制边界框和距离信息
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 255), 3)
cvzone.putTextRect(img, f'{int(distanceCM)} cm', (x+5, y-10))
cv2.imshow("Image", img)
cv2.waitKey(1) # 按下任意按键退出循环
The main purpose of this code block is to use the camera to capture an image in real time, use HandDetector
the class to detect the hand, calculate the distance between two keypoints, map the distance to centimeter values, and then draw the bounding box and distance information in the image. Finally, by cv2.imshow
displaying the drawing results in the window, use cv2.waitKey
to wait for and process keyboard input, so that the program can continue to run.
all codes
import cv2
from HandTrackingModule import HandDetector
import math
import numpy as np
import cvzone
# Webcam
cap = cv2.VideoCapture(0)
cap.set(3, 1280)
cap.set(4, 720)
# Hand Detector
detector = HandDetector(detectionCon=0.8, maxHands=1)
# Find Function
# x is the raw distance y is the value in cm
x = [300, 245, 200, 170, 145, 130, 112, 103, 93, 87, 80, 75, 70, 67, 62, 59, 57]
y = [20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]
coff = np.polyfit(x, y, 2) # y = Ax^2 + Bx + C
# Loop
while True:
success, img = cap.read()
hands = detector.findHands(img, draw=False)
if hands:
lmList = hands[0]['lmList']
x, y, w, h = hands[0]['bbox']
x1, y1 = lmList[5]
x2, y2 = lmList[17]
distance = int(math.sqrt((y2 - y1) ** 2 + (x2 - x1) ** 2))
A, B, C = coff
distanceCM = A * distance ** 2 + B * distance + C
# print(distanceCM, distance)
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 255), 3)
cvzone.putTextRect(img, f'{int(distanceCM)} cm', (x+5, y-10))
cv2.imshow("Image", img)
cv2.waitKey(1)
If you have any problems, please leave a message in the comment area