Development environment: Thonny, python
Hardware requirements: esp32cam
Implementation ideas:
Transfer the pictures taken by esp32cam to the computer through the network protocol for processing
Lower computer code:
import camera
import socket
import network
import camera
import time
SSID='XXX'#输入wifi名字
password='XXX'#输入WIFI密码
IP='192.168.1.34'#换成自己的IPv4地址
port=9090
#连接wifi
wlan = network.WLAN(network.STA_IF)
wlan.active(True)
if not wlan.isconnected():
print('connecting to network...')
wlan.connect(SSID,password)
while not wlan.isconnected():
pass
print('网络配置:', wlan.ifconfig())
camera.init(0,format=3)
s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM,0)
i=0
while 1:
i=i+1
buf = camera.capture() # 获取图像数据
s.sendto(buf,(IP,port)) # 向服务器发送图像数据
time.sleep(0.15)
print(i,buf)
The principle is very simple. The way of transmission is UDP. At the beginning, I thought about using serial port or TCP, but the serial port only has UART0, so I can’t use it. Compared with UDP, TCP is faster than UDP, so UDP is used.
PC code:
import socket
import cv2
import io
from PIL import Image
import numpy as np
import mediapipe as mp
import math
IP="192.168.1.34" #换成自己的IPv4地址
port=9090
def vector_2d_angle(v1,v2):
'''
求解二维向量的角度
'''
v1_x=v1[0]
v1_y=v1[1]
v2_x=v2[0]
v2_y=v2[1]
try:
angle_= math.degrees(math.acos((v1_x*v2_x+v1_y*v2_y)/(((v1_x**2+v1_y**2)**0.5)*((v2_x**2+v2_y**2)**0.5))))
except:
angle_ =65535.
if angle_ > 180.:
angle_ = 65535.
return angle_
def hand_angle(hand_):
'''
获取对应手相关向量的二维角度,根据角度确定手势
'''
angle_list = []
#---------------------------- thumb 大拇指角度
angle_ = vector_2d_angle(
((int(hand_[0][0])- int(hand_[2][0])),(int(hand_[0][1])-int(hand_[2][1]))),
((int(hand_[3][0])- int(hand_[4][0])),(int(hand_[3][1])- int(hand_[4][1])))
)
angle_list.append(angle_)
#---------------------------- index 食指角度
angle_ = vector_2d_angle(
((int(hand_[0][0])-int(hand_[6][0])),(int(hand_[0][1])- int(hand_[6][1]))),
((int(hand_[7][0])- int(hand_[8][0])),(int(hand_[7][1])- int(hand_[8][1])))
)
angle_list.append(angle_)
#---------------------------- middle 中指角度
angle_ = vector_2d_angle(
((int(hand_[0][0])- int(hand_[10][0])),(int(hand_[0][1])- int(hand_[10][1]))),
((int(hand_[11][0])- int(hand_[12][0])),(int(hand_[11][1])- int(hand_[12][1])))
)
angle_list.append(angle_)
#---------------------------- ring 无名指角度
angle_ = vector_2d_angle(
((int(hand_[0][0])- int(hand_[14][0])),(int(hand_[0][1])- int(hand_[14][1]))),
((int(hand_[15][0])- int(hand_[16][0])),(int(hand_[15][1])- int(hand_[16][1])))
)
angle_list.append(angle_)
#---------------------------- pink 小拇指角度
angle_ = vector_2d_angle(
((int(hand_[0][0])- int(hand_[18][0])),(int(hand_[0][1])- int(hand_[18][1]))),
((int(hand_[19][0])- int(hand_[20][0])),(int(hand_[19][1])- int(hand_[20][1])))
)
angle_list.append(angle_)
return angle_list
def h_gesture(angle_list):
thr_angle = 65.
thr_angle_thumb = 53.
thr_angle_s = 49.
gesture_str = None
if 65535. not in angle_list:
if (angle_list[0]>thr_angle_thumb) and (angle_list[1]<thr_angle_s) and (angle_list[2]>thr_angle_s) and (angle_list[3]>thr_angle) and (angle_list[4]>thr_angle):
gesture_str = "one"
elif (angle_list[0]>thr_angle_thumb) and (angle_list[1]<thr_angle_s) and (angle_list[2]<thr_angle_s) and (angle_list[3]>thr_angle) and (angle_list[4]>thr_angle):
gesture_str = "two"
elif (angle_list[0]>thr_angle_thumb) and (angle_list[1]<thr_angle_s) and (angle_list[2]<thr_angle_s) and (angle_list[3]<thr_angle) and (angle_list[4]>thr_angle):
gesture_str = "three"
elif (angle_list[0]>thr_angle_thumb) and (angle_list[1]<thr_angle_s) and (angle_list[2]<thr_angle_s) and (angle_list[3]<thr_angle) and (angle_list[4]<thr_angle):
gesture_str = "four"
elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (angle_list[3] < thr_angle_s) and (angle_list[4] < thr_angle_s):
gesture_str = "five"
return gesture_str
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
static_image_mode=False,
max_num_hands=1,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
print("开始")
s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM,0)
s.bind((IP,port))
a=0
while True:
data,IP = s.recvfrom(100000)
a=a+1
print(a,IP)
bytes_stream = io.BytesIO(data)
image = Image.open(bytes_stream)
img = np.asarray(image)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# BiCubic_small = cv2.resize(img, (int(img.shape[1] * 0.5), int(img.shape[0] * 0.5)), interpolation=cv2.INTER_CUBIC)
# img = cv2.resize(BiCubic_small, (600, 400), interpolation=cv2.INTER_CUBIC)
results = hands.process(img)
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(img, hand_landmarks, mp_hands.HAND_CONNECTIONS)
hand_local = []
for i in range(21):
x = hand_landmarks.landmark[i].x * img.shape[1]
y = hand_landmarks.landmark[i].y * img.shape[0]
hand_local.append((x, y))
if hand_local:
angle_list = hand_angle(hand_local)
gesture_str = h_gesture(angle_list)
cv2.putText(img, gesture_str, (0, 100), 0, 1.3, (0, 0, 255), 3)
cv2.imshow('MediaPipe Hands', img)
if cv2.waitKey(1)==ord("q"):
break
The effect of the finished product is as follows. The picture sending rate and loss rate are both acceptable. After all, a module of more than 30 pieces has been done to detect the key points of the hand and the joints of the human body. With this successful precedent, the next step is to do Item classification.
Oh!!!! Another important point is that it will be much better to connect an antenna