opencv combat project gesture recognition - hand distance measurement

Gesture Recognition Series Article Directory

Gesture recognition is a human-computer interaction technology that realizes the operation and control of computers, smart phones, smart TVs and other devices by recognizing human gestures.

1. Opencv implements hand tracking (locating key points of the hand)

2. opencv actual combat project realizes gesture tracking and returns position information (encapsulation call)

3. Gesture recognition - gesture volume control (opencv)

4. Opencv practical project gesture recognition - gesture control mouse

5. opencv combat project gesture recognition - hand distance measurement

to be continued

This project uses Google's open source framework mediapipe , which has a lot of models for us to use, such as face detection, body detection, hand detection, etc.

insert image description here

 The code needs to use the opencv    HandTraqckModule module mediapipe module

One, HandTraqckModule module 

This time we will continue to add new content to the HandTraqckModule module. You can skip it if you already know it, just copy and paste it.

import cv2
import mediapipe as mp
import math

Define HandDetectorclass:

class HandDetector:
    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, minTrackCon=0.5):
        # 初始化参数
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.minTrackCon = minTrackCon

        # 初始化 Mediapipe 的手部检测模块和绘制工具
        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(static_image_mode=self.mode, max_num_hands=self.maxHands,
                                        min_detection_confidence=self.detectionCon, min_tracking_confidence=self.minTrackCon)
        self.mpDraw = mp.solutions.drawing_utils
        self.tipIds = [4, 8, 12, 16, 20]
        self.fingers = []
        self.lmList = []

Define findHandsthe method for detecting hands in an image:

def findHands(self, img, draw=True, flipType=True):
    # 将图像从 BGR 转换为 RGB 格式
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # 使用 Mediapipe 进行手部检测
    self.results = self.hands.process(imgRGB)
    allHands = []
    h, w, c = img.shape
    if self.results.multi_hand_landmarks:
        for handType, handLms in zip(self.results.multi_handedness, self.results.multi_hand_landmarks):
            myHand = {}
            # 提取关键点的像素坐标并存储在 mylmList 中
            mylmList = []
            xList = []
            yList = []
            for id, lm in enumerate(handLms.landmark):
                px, py = int(lm.x * w), int(lm.y * h)
                mylmList.append([px, py])
                xList.append(px)
                yList.append(py)
            
            # 计算边界框信息
            xmin, xmax = min(xList), max(xList)
            ymin, ymax = min(yList), max(yList)
            boxW, boxH = xmax - xmin, ymax - ymin
            bbox = xmin, ymin, boxW, boxH
            cx, cy = bbox[0] + (bbox[2] // 2), bbox[1] + (bbox[3] // 2)

            myHand["lmList"] = mylmList
            myHand["bbox"] = bbox
            myHand["center"] = (cx, cy)

            if flipType:
                if handType.classification[0].label == "Right":
                    myHand["type"] = "Left"
                else:
                    myHand["type"] = "Right"
            else:
                myHand["type"] = handType.classification[0].label
            allHands.append(myHand)

            # 在图像上绘制手部信息
            if draw:
                self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
                cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
                              (bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
                              (255, 0, 255), 2)
                cv2.putText(img, myHand["type"], (bbox[0] - 30, bbox[1] - 30), cv2.FONT_HERSHEY_PLAIN,
                            2, (255, 0, 255), 2)
    if draw:
        return allHands, img  # 返回检测到的手部信息和绘制后的图像
    else:
        return allHands  # 只返回检测到的手部信息,不进行绘制

Define fingersUpthe method to detect how many fingers are open:

def fingersUp(self, myHand):
    # 获取手部信息
    myHandType = myHand["type"]
    myLmList = myHand["lmList"]
    if self.results.multi_hand_landmarks:
        fingers = []
        # 检测拇指
        if myHandType == "Right":
            if myLmList[self.tipIds[0]][0] > myLmList[self.tipIds[0] - 1][0]:
                fingers.append(1)
            else:
                fingers.append(0)
        else:
            if myLmList[self.tipIds[0]][0] < myLmList[self.tipIds[0] - 1][0]:
                fingers.append(1)
            else:
                fingers.append(0)

        # 检测其他手指
        for id in range(1, 5):
            if myLmList[self.tipIds[id]][1] < myLmList[self.tipIds[id] - 2][1]:
                fingers.append(1)
            else:
                fingers.append(0)
    return fingers

Finally, mainthe function uses HandDetectorthe class to detect hands and plot the detections in the image:

def main():
    cap = cv2.VideoCapture(0)
    detector = HandDetector(detectionCon=0.8, maxHands=2)
    while True:
        # 获取图像帧
        success, img = cap.read()
        # 检测手部并获取手部信息和绘制后的图像
        hands, img = detector.findHands(img)
        
        if hands:
            # 处理检测到的手部信息,如关键点、边界框、手型等
            # ...

        # 显示图像
        cv2.imshow("Image", img)
        cv2.waitKey(1)

if __name__ == "__main__":
    main()

In this loop, the program captures image frames from the camera, then uses HandDetectorthe class to detect hands and plot the detection results. You can add code to get hand information and process it as needed.

all codes

"""
Hand Tracking Module
By: Computer Vision Zone
Website: https://www.computervision.zone/
"""

import cv2
import mediapipe as mp
import math


class HandDetector:
    """
    Finds Hands using the mediapipe library. Exports the landmarks
    in pixel format. Adds extra functionalities like finding how
    many fingers are up or the distance between two fingers. Also
    provides bounding box info of the hand found.
    """

    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, minTrackCon=0.5):
        """
        :param mode: In static mode, detection is done on each image: slower
        :param maxHands: Maximum number of hands to detect
        :param detectionCon: Minimum Detection Confidence Threshold
        :param minTrackCon: Minimum Tracking Confidence Threshold
        """
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.minTrackCon = minTrackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(static_image_mode=self.mode, max_num_hands=self.maxHands,
                                        min_detection_confidence=self.detectionCon, min_tracking_confidence = self.minTrackCon)
        self.mpDraw = mp.solutions.drawing_utils
        self.tipIds = [4, 8, 12, 16, 20]
        self.fingers = []
        self.lmList = []

    def findHands(self, img, draw=True, flipType=True):
        """
        Finds hands in a BGR image.
        :param img: Image to find the hands in.
        :param draw: Flag to draw the output on the image.
        :return: Image with or without drawings
        """
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)
        allHands = []
        h, w, c = img.shape
        if  self.results.multi_hand_landmarks:
            for handType,handLms in zip(self.results.multi_handedness,self.results.multi_hand_landmarks):
                myHand={}
                ## lmList
                mylmList = []
                xList = []
                yList = []
                for id, lm in enumerate(handLms.landmark):
                    px, py = int(lm.x * w), int(lm.y * h)
                    mylmList.append([px, py])
                    xList.append(px)
                    yList.append(py)

                ## bbox
                xmin, xmax = min(xList), max(xList)
                ymin, ymax = min(yList), max(yList)
                boxW, boxH = xmax - xmin, ymax - ymin
                bbox = xmin, ymin, boxW, boxH
                cx, cy = bbox[0] + (bbox[2] // 2), \
                         bbox[1] + (bbox[3] // 2)

                myHand["lmList"] = mylmList
                myHand["bbox"] = bbox
                myHand["center"] =  (cx, cy)

                if flipType:
                    if handType.classification[0].label =="Right":
                        myHand["type"] = "Left"
                    else:
                        myHand["type"] = "Right"
                else:myHand["type"] = handType.classification[0].label
                allHands.append(myHand)

                ## draw
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms,
                                               self.mpHands.HAND_CONNECTIONS)
                    cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
                                  (bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
                                  (255, 0, 255), 2)
                    cv2.putText(img,myHand["type"],(bbox[0] - 30, bbox[1] - 30),cv2.FONT_HERSHEY_PLAIN,
                                2,(255, 0, 255),2)
        if draw:
            return allHands,img
        else:
            return allHands


    def fingersUp(self,myHand):
        """
        Finds how many fingers are open and returns in a list.
        Considers left and right hands separately
        :return: List of which fingers are up
        """
        myHandType =myHand["type"]
        myLmList = myHand["lmList"]
        if self.results.multi_hand_landmarks:
            fingers = []
            # Thumb
            if myHandType == "Right":
                if myLmList[self.tipIds[0]][0] > myLmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)
            else:
                if myLmList[self.tipIds[0]][0] < myLmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)

            # 4 Fingers
            for id in range(1, 5):
                if myLmList[self.tipIds[id]][1] < myLmList[self.tipIds[id] - 2][1]:
                    fingers.append(1)
                else:
                    fingers.append(0)
        return fingers




def main():
    cap = cv2.VideoCapture(0)
    detector = HandDetector(detectionCon=0.8, maxHands=2)
    while True:
        # Get image frame
        success, img = cap.read()
        # Find the hand and its landmarks
        hands, img = detector.findHands(img)  # with draw
        # hands = detector.findHands(img, draw=False)  # without draw

        if hands:
            # Hand 1
            hand1 = hands[0]
            lmList1 = hand1["lmList"]  # List of 21 Landmark points
            bbox1 = hand1["bbox"]  # Bounding box info x,y,w,h
            centerPoint1 = hand1['center']  # center of the hand cx,cy
            handType1 = hand1["type"]  # Handtype Left or Right

            fingers1 = detector.fingersUp(hand1)

            if len(hands) == 2:
                # Hand 2
                hand2 = hands[1]
                lmList2 = hand2["lmList"]  # List of 21 Landmark points
                bbox2 = hand2["bbox"]  # Bounding box info x,y,w,h
                centerPoint2 = hand2['center']  # center of the hand cx,cy
                handType2 = hand2["type"]  # Hand Type "Left" or "Right"

                fingers2 = detector.fingersUp(hand2)

                # Find Distance between two Landmarks. Could be same hand or different hands
                length, info, img = detector.findDistance(lmList1[8], lmList2[8], img)  # with draw
                # length, info = detector.findDistance(lmList1[8], lmList2[8])  # with draw
        # Display
        cv2.imshow("Image", img)
        cv2.waitKey(1)


if __name__ == "__main__":
    main()

----------------------------------------Dividing line-------- ---------------------

For the hand detection module this time, we have optimized the position detection and integrated it into the hand detection

2. Main module

The idea is: calculate the position information of the two key points 5 and 17, and then calculate the Euclidean distance between the two points. This distance changes with the movement of the hand and the pixel distance shown in the figure. We measure the parts in order Position change information, so as to design a function to match the position change relationship. (Of course, the palms of different people will be different, this is just an approximate distance, the error is about 3%)

Next is the main module code

Import the necessary libraries and modules:

import cv2
from HandTrackingModule import HandDetector
import math
import numpy as np
import cvzone

Set camera parameters and hand detector:

cap = cv2.VideoCapture(0)
cap.set(3, 1280)  # 设置摄像头宽度
cap.set(4, 720)   # 设置摄像头高度
detector = HandDetector(detectionCon=0.8, maxHands=1)  # 创建 HandDetector 实例

 Define a function for mapping hand distances to centimeter values:

# Find Function
x = [300, 245, 200, 170, 145, 130, 112, 103, 93, 87, 80, 75, 70, 67, 62, 59, 57]
y = [20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]
coff = np.polyfit(x, y, 2)  # 使用二次多项式拟合数据,得到系数 A, B, C

 Enter the main loop to process real-time camera images:

while True:
    success, img = cap.read()
    hands = detector.findHands(img, draw=False)  # 在图像中检测手部,不进行绘制

    if hands:
        # 获取手部信息
        lmList = hands[0]['lmList']  # 关键点列表
        x, y, w, h = hands[0]['bbox']  # 边界框坐标和尺寸
        x1, y1 = lmList[5]  # 大拇指第一个关键点的坐标
        x2, y2 = lmList[17]  # 小指最后一个关键点的坐标

        # 计算两点之间的欧几里得距离
        distance = int(math.sqrt((y2 - y1) ** 2 + (x2 - x1) ** 2))
        
        # 将距离映射到厘米值
        A, B, C = coff
        distanceCM = A * distance ** 2 + B * distance + C

        # 在图像中绘制边界框和距离信息
        cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 255), 3)
        cvzone.putTextRect(img, f'{int(distanceCM)} cm', (x+5, y-10))

    cv2.imshow("Image", img)
    cv2.waitKey(1)  # 按下任意按键退出循环

The main purpose of this code block is to use the camera to capture an image in real time, use HandDetectorthe class to detect the hand, calculate the distance between two keypoints, map the distance to centimeter values, and then draw the bounding box and distance information in the image. Finally, by cv2.imshowdisplaying the drawing results in the window, use cv2.waitKeyto wait for and process keyboard input, so that the program can continue to run.

 all codes

import cv2
from HandTrackingModule import HandDetector
import math
import numpy as np
import cvzone

# Webcam
cap = cv2.VideoCapture(0)
cap.set(3, 1280)
cap.set(4, 720)

# Hand Detector
detector = HandDetector(detectionCon=0.8, maxHands=1)

# Find Function
# x is the raw distance y is the value in cm
x = [300, 245, 200, 170, 145, 130, 112, 103, 93, 87, 80, 75, 70, 67, 62, 59, 57]
y = [20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]
coff = np.polyfit(x, y, 2)  # y = Ax^2 + Bx + C

# Loop
while True:
    success, img = cap.read()
    hands = detector.findHands(img, draw=False)

    if hands:
        lmList = hands[0]['lmList']
        x, y, w, h = hands[0]['bbox']
        x1, y1 = lmList[5]
        x2, y2 = lmList[17]

        distance = int(math.sqrt((y2 - y1) ** 2 + (x2 - x1) ** 2))
        A, B, C = coff
        distanceCM = A * distance ** 2 + B * distance + C

        # print(distanceCM, distance)

        cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 255), 3)
        cvzone.putTextRect(img, f'{int(distanceCM)} cm', (x+5, y-10))

    cv2.imshow("Image", img)
    cv2.waitKey(1)

If you have any problems, please leave a message in the comment area

Guess you like

Origin blog.csdn.net/weixin_45303602/article/details/132261856