Python Opencv practice - gesture volume control

This article makes a gesture volume control function based on the previous hand tracking function. The code uses the HandDetector encapsulated in the previous hand tracking. This article is here:

Python Opencv Practice-Hand Tracking-CSDN blog article has been viewed and read 626 times, liked 11 times, and collected 7 times. Use the mediapipe library for real-time tracking of hands. For an introduction to mediapipe, please visit Baidu. https://blog.csdn.net/vivo01/article/details/135071340?spm=1001.2014.3001.5502

Pycaw is used for volume control. To install pacaw, just use pip install pycaw.

code show as below:

import cv2 as cv
import math
import mediapipe as mp
import time
from ctypes import cast,POINTER
from comtypes import CLSCTX_ALL
#使用pycaw来控制音量，pip install pycaw
from pycaw.pycaw import AudioUtilities,IAudioEndpointVolume

class HandDetector():
    def __init__(self, mode=False,
                 maxNumHands=2,
                 modelComplexity=1,
                 minDetectionConfidence=0.5,
                 minTrackingConfidence=0.5):
        self.mode = mode
        self.maxNumHands = maxNumHands
        self.modelComplexity = modelComplexity
        self.minDetectionConfidence = minDetectionConfidence
        self.minTrackingConfidence = minTrackingConfidence
        #创建mediapipe的solutions.hands对象
        self.mpHands = mp.solutions.hands
        self.handsDetector = self.mpHands.Hands(self.mode, self.maxNumHands, self.modelComplexity, self.minDetectionConfidence, self.minTrackingConfidence)
        #创建mediapipe的绘画工具
        self.mpDrawUtils = mp.solutions.drawing_utils

    def findHands(self, img, drawOnImage=True):
        #mediapipe手部检测器需要输入图像格式为RGB
        #cv默认的格式是BGR，需要转换
        imgRGB = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        #调用手部检测器的process方法进行检测
        self.results = self.handsDetector.process(imgRGB)
        #print(results.multi_hand_landmarks)
    
        #如果multi_hand_landmarks有值表示检测到了手
        if self.results.multi_hand_landmarks:
            #遍历每一只手的landmarks
            for handLandmarks in self.results.multi_hand_landmarks:
                if drawOnImage:
                    self.mpDrawUtils.draw_landmarks(img, handLandmarks, self.mpHands.HAND_CONNECTIONS)
        return img;

    #从结果中查询某只手的landmark list
    def findHandPositions(self, img, handID=0, drawOnImage=True):
        landmarkList = []
        if self.results.multi_hand_landmarks:
            handLandmarks = self.results.multi_hand_landmarks[handID]
            for id,landmark in enumerate(handLandmarks.landmark):
                #处理每一个landmark,将landmark里的X,Y（比例）转换为帧数据的XY坐标
                h,w,c = img.shape
                centerX,centerY = int(landmark.x * w), int(landmark.y * h)
                landmarkList.append([id, centerX, centerY])
                if (drawOnImage):
                    #将landmark绘制成圆
                    cv.circle(img, (centerX,centerY), 8, (0,255,0))
        return landmarkList

def DisplayFPS(img, preTime):
    curTime = time.time()
    if (curTime - preTime == 0):
        return curTime;
    fps = 1 / (curTime - preTime)
    cv.putText(img, "FPS:" + str(int(fps)), (10,70), cv.FONT_HERSHEY_PLAIN,
              3, (0,255,0), 3)
    return curTime

def AudioEndpointGet():
    devices = AudioUtilities.GetSpeakers()
    interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
    volume = cast(interface, POINTER(IAudioEndpointVolume))
    range = volume.GetVolumeRange()
    return volume,range

def AudioVolumeLevelSet(volume, range, value):
    if volume:
        if (value < range[0]) or (value > range[1]):
            return
        volume.SetMasterVolumeLevel(value, None)

def main():
    video = cv.VideoCapture('../../SampleVideos/handVolumeControl.mp4')
    #FPS显示
    preTime = 0
    handDetector = HandDetector(minDetectionConfidence=0.7)
    volume,volumeRange = AudioEndpointGet()
    print(volumeRange)
    #AudioVolumeLevelSet(volume, volumeRange, volumeRange[0])
    minFingerDistance = 1000
    maxFingerDistance = 0
    
    while True:
        ret,frame = video.read()
        if ret == False:
            break;
        frame = handDetector.findHands(frame)
        hand0Landmarks = handDetector.findHandPositions(frame)
        if (len(hand0Landmarks) != 0):
            #print(hand0Landmarks[4], hand0Landmarks[8])
            #取出大拇指(4)和食指(8)的指尖的点对应的坐标
            thumbX,thumbY = hand0Landmarks[4][1], hand0Landmarks[4][2]
            indexFingerX,indexFingerY = hand0Landmarks[8][1],hand0Landmarks[8][2]
            #计算两个指尖的点指尖的中点
            cx,cy = (thumbX + indexFingerX) / 2, (thumbY + indexFingerY) / 2
            #用实心圆突出显示出这两个个点
            cv.circle(frame, (thumbX,thumbY), 18, (90,220,180), cv.FILLED)
            cv.circle(frame, (indexFingerX,indexFingerY), 18, (0,120,255), cv.FILLED)
            
            #绘制两个点形成的直线
            cv.line(frame, (thumbX,thumbY), (indexFingerX,indexFingerY), (255,60,60), 3)
            #计算食指和拇指指尖的距离
            distance = math.hypot(thumbX - indexFingerX, thumbY - indexFingerY)
            #测试两指指尖最小和最大距离，改进方案可以是用摄像头做实时校准后再进行控制
            #本案例中直接获取视频里的最小和最大距离直接用作判断（我拍的视频里范围是30 - 425之间）
            if distance < minFingerDistance:
                minFingerDistance = distance
            if distance > maxFingerDistance:
                maxFingerDistance = distance
            #print(distance)
            if distance < 40:
                #两个指尖的中点显示为绿色，音量设置为最小值
                cv.circle(frame, (int(cx),int(cy)), 18, (0,255,0), cv.FILLED)
                AudioVolumeLevelSet(volume, volumeRange, volumeRange[0])
            else:
                cv.circle(frame, (int(cx),int(cy)), 18, (0,0,255), cv.FILLED)
                #这里为了方便直接使用425（本视频最大值）做比例换算
                #我本机的volumeRange是-63.5 到 0， 步长0.5
                value = volumeRange[0] * (1 - (distance / 425))
                print(value)
                AudioVolumeLevelSet(volume, volumeRange, value)
            
        preTime = DisplayFPS(frame, preTime)
        cv.imshow('Real Time Hand Detection', frame)
        if cv.waitKey(30) & 0xFF == ord('q'):
            break;
    print("Min & Max distance between thumb and index finger tips: ", minFingerDistance, maxFingerDistance)
    video.release()
    cv.destroyAllWindows()

if __name__ == "__main__":
    main()

You can refer to my B station video for the effect:

Python Opencv hand training - gesture volume control_bilibili_bilibili implements a gesture volume control function based on mediapipe hand detection. Source code reference my CSDN: https://blog.csdn.net/vivo01/article/details/135118979?spm =1001.2014.3001.5502, video views 1, comments 0, likes 0, coins tossed 0, favorites 0, retweets 0, video author vivo119, author profile is a coder who likes Coyote, hobby Game development, related videos: Xiaoguai likes to eat noodles the most, daily conflicts between Xiaoguai (white) and sesame (black), this fat dog wants to jump on the sofa, but is a little fatter, Python Opencv - mediapipe does hand tracking and recognition , why do puppies feel embarrassed when looking at the camera? The dog who suddenly loves tomatoes, the rotating Mi Nuo dog, which has two modes of manual rotation and automatic rotation. Take a good class, the first experience of sugar-free lotus root starch for puppies, Mi Nuo dog Bathing notes, the whole process is a good dog https://www.bilibili.com/video/BV1Ej411H79q/?vd_source=474bff49614e62744eb84e9f8340d91a

Python Opencv practice - gesture volume control

Guess you like