Machine learning: KNN completes English sign language classification

Machine learning: KNN completes English sign language classification

Let's take a look at standard English sign language first:
insert image description here

In this paper, the KNN network recognizes part of the effect: (of course only a few letters are selected):
insert image description hereinsert image description here
insert image description here
insert image description here
insert image description here

The main process completed is:

  1. Process an image dataset on kaggle through mediapipe to obtain the positional relationship of 21 landmarks
  2. Build a KNN model
  3. model checking

Build a dataset

The image dataset comes from a dataset on Kaggle, the dataset link .
After the download is complete, we get a bunch of 128*128 images. Those who don't know DL can only use mediapipe to complete the extraction of landmark information (mediapipe related packages are introduced in another article , and you can also see the end of the article).
The processing flow is:

  1. Traverse the image data set, and use mediapipe to complete the extraction of gesture information (in order to improve accuracy, the confidence interval can be adjusted, which is introduced in another article), that is, the xy coordinates of 21 coordinate points.
  2. The provider of the dataset sets the last letter of the image file as the meaning of the gesture, and we can save it to the file as our target value.
import pandas as pd
import HandTrackingModule as htm
import cv2
import os
import time
import numpy as np
import csv


detector = htm.handDetctor(mode=True, detectionCon=0.6, trackCon=0.6)
csv_col_name = ['0_x', '0_y', '1_x', '1_y', '2_x', '2_y', '3_x', '3_y', '4_x', '4_y', '5_x', '5_y',
                '6_x', '6_y', '7_x', '7_y', '8_x', '8_y', '9_x', '9_y', '10_x', '10_y', '11_x', '11_y',
                '12_x', '12_y', '13_x', '13_y', '14_x', '14_y', '15_x', '15_y', '16_x', '16_y', '17_x', '17_y',
                '18_x', '18_y', '19_x', '19_y', '20_x', '20_y','target']


def load_image():

    path = "dataset5"
    dirs = os.listdir(path)
    for file_ABCD in dirs:
        for file_abcd in os.listdir(path+"/"+file_ABCD):
            for img_path in os.listdir(path+"/"+file_ABCD+"/"+file_abcd):
                # 跳过暗的图片
                if "depth" in img_path:
                    continue
                # print(path+"/"+file_ABCD+"/"+file_abcd+"/"+img_path)
                img = cv2.imread(path+"/"+file_ABCD+"/"+file_abcd+"/"+img_path)
                # 对图像进行处理
                img = detector.findHands(img)
                lmList = detector.findPosition(img, draw=False)
                if len(lmList) == 42:
                    # print(lmList)
                    # print(file_abcd)
                    lmList.append(file_abcd)
                    # 将特征点写入csv文件中
                    write_to_csv(lmList)
                cv2.imshow("show", img)
                cv2.waitKey(1)


def write_to_csv(lmList):
    # test = pd.DataFrame(columns=csv_col_name, data=[lmList])
    # test.to_csv('testcsv.csv', index=True)
    with open(r'testcsv.csv', mode='a', newline='', encoding='utf8') as cfa:
        csv_write = csv.writer(cfa)
        csv_write.writerow(lmList)
    return None


if __name__ == '__main__':
    load_image()

We save the data to the testcsv.csv file, as shown below:
insert image description here

In fact, the amount of data is a little small

Build models and evaluate

Modeling:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
import joblib


def load_data():
    data = pd.read_csv("testcsv.csv")
    # print(data.iloc[:, 0:42])
    # 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(data.iloc[:, 0:42], data.target, random_state=10)

    # 标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    # 训练集和测试集做相同处理(很重要!)
    x_test = transfer.transform(x_test)

    # KNN算法预估器  建立模型
    estimator = KNeighborsClassifier(n_neighbors=10)
    # 添加网格搜索交叉验证
    param_dict = {
    
    "n_neighbors": [11, 13, 15, 17, 19, 21]}
    estimator = GridSearchCV(estimator, param_grid=param_dict, cv=10)

    estimator.fit(x_train, y_train)

    # 模型评估
    # 1 直接对比真实值和预估值
    y_predict = estimator.predict(x_test)
    print(y_predict == y_test)

    # 计算准确率
    score = estimator.score(x_test, y_test)
    print(score)

    # 保存模型
    joblib.dump(estimator, "k_near.pkl")

Real-time detection:


import HandTrackingModule as htm
import cv2
import numpy as np

wCam, hCam = 640, 480
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
cap.set(3, wCam)
cap.set(4, hCam)

detector = htm.handDetctor(detectionCon=0.6, trackCon=0.6)
model = joblib.load("k_near.pkl")

while True:
    success, img = cap.read()

    img = detector.findHands(img)
    lmList = detector.findPosition(img, draw=False)
    if len(lmList) == 42:
        lm = transfer.transform(np.array(lmList).reshape(1, -1))
        m_predict = model.predict(lm)
        cv2.putText(img, str(m_predict), (10, 70), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 255), 3)
    cv2.imshow("image", img)
    if cv2.waitKey(2) & 0xFF == 27:
        break

Optimizable point

  1. The parameters of KNN and the range of grid search can be further optimized.
  2. KNN itself is only suitable for small data scenarios, and is sensitive to the K value, and the overall gesture recognition effect is average.
  3. The data preprocessing part is extremely rough, and operations such as data cleaning can be performed

I have studied, but I am too lazy to do it.

attached

HandTrackingModule.py

import cv2
import mediapipe as mp
import time
import math


class handDetctor():
    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, trackCon=0.5):
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.trackCon = trackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(self.mode, self.maxHands,
                                        self.detectionCon, self.trackCon)
        self.mpDraw = mp.solutions.drawing_utils

    def findHands(self, img, draw=True, ):
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)#转换为rgb
        self.results = self.hands.process(imgRGB)

        # print(results.multi_hand_landmarks)
        if self.results.multi_hand_landmarks:
            for handLms in self.results.multi_hand_landmarks:
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)

        return img

    def findPosition(self, img, handNo=0, draw=True):
        lmList = []
        if self.results.multi_hand_landmarks:
            myHand = self.results.multi_hand_landmarks[handNo]
            for id, lm in enumerate(myHand.landmark):
                # print(id, lm)
                # 获取手指关节点
                h, w, c = img.shape
                # cx, cy = int(lm.x*w), int(lm.y*h)
                lmList.append(lm.x)
                lmList.append(lm.y)
                # if draw:
                #     cv2.putText(img, str(int(id)), (cx+10, cy+10), cv2.FONT_HERSHEY_PLAIN,
                #                 1, (0, 0, 255), 2)

        return lmList

    def fingerStatus(self, lmList):
    # 返回列表 包含每个手指的开合状态
        fingerList = []
        id, originx, originy = lmList[0]
        keypoint_list = [[2, 4], [6, 8], [10, 12], [14, 16], [18, 20]]
        for point in keypoint_list:
            id, x1, y1 = lmList[point[0]]
            id, x2, y2 = lmList[point[1]]
            if math.hypot(x2-originx, y2-originy) > math.hypot(x1-originx, y1-originy):
                fingerList.append(True)
            else:
                fingerList.append(False)

        return fingerList

Guess you like

Origin blog.csdn.net/qq_43550173/article/details/116886019