1. Conceptos básicos
El personal en el partido de fútbol son: 11 personas en el equipo A, 11 personas en el equipo B, y árbitros, entre los cuales no estudiaremos por el momento al portero.
El personal del estadio debe clasificarse y presentarse en el Kanban 2D.
1.1 Identificar el objetivo:
1) Equipo blanco en el terreno de juego
2) El equipo azul en el campo
3) Árbitros en el campo
1.2 Ideas de implementación
Opción 1: Red de aprendizaje profundo autoconstruida Relu (introducción principal)
1) Use keras para entrenar la red profunda para formar el archivo modelo H5
2) archivo H5 a archivo onnx
Solución 2: Usa Yolo7 para entrenar la red
Aplicaciones de ingeniería
2. Formación de la red Keras
Depende del paquete de python
autopep8==1.5.4
certifi==2020.6.20
chardet==3.0.4
cycler==0.10.0
idna==2.10
kiwisolver==1.2.0
matplotlib==3.3.1
mercurial==5.5
numpy==1.19.1
opencv-python==4.5.1.48
packaging==20.4
pandas==1.1.1
Pillow==7.2.0
pycodestyle==2.6.0
pyparsing==2.4.7
PyQt5==5.14.2
PyQt5-sip==12.11.0
pyqtgraph==0.11.0
python-dateutil==2.8.1
pytz==2020.1
requests==2.24.0
requests-cache==0.5.2
sip==5.4.0
six==1.15.0
toml==0.10.2
urllib3==1.25.11
depende del entorno
Python 3.8, tensoflow 2.0 (incluido keras),
2.1 Modelo de red de Python
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np
from keras.datasets import mnist
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import os
import glob
import cv2
from google.colab.patches import cv2_imshow
import base64
from IPython.display import clear_output, Image
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
print(
'\n\nThis error most likely means that this notebook is not '
'configured to use a GPU. Change this in Notebook Settings via the '
'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
raise SystemError('GPU device not found')
tf.__version__, keras.__version__
# LABELS
BLUE, WHITE, REF = 0, 1, 2
def reading_files(path, label):
files = glob.glob(path)
data, labels = [], []
for file in files:
I = cv2.imread(file)
data.append(I)
l = np.zeros((3,))
l[label] = 1
labels.append(l)
return np.array(data, dtype=np.float32), np.array(labels, dtype=np.float32)
def load_data(shuffle=True):
X, Y = None, None
checker = lambda X, M: M if X is None else np.vstack([M, X])
for path, label in (("./blue/*.jpg", BLUE),
("./white/*.jpg", WHITE), ("./referee/*.jpg", REF)):
data, labels = reading_files(path, label)
X = checker(X, data)
Y = checker(Y, labels)
if shuffle:
initial_shape_X, initial_shape_Y = X.shape, Y.shape
feature_count = np.prod(np.array([*X.shape[1:]]))
whole_d = np.hstack([X.reshape(X.shape[0], -1), Y])
np.random.shuffle(whole_d)
X = whole_d[:, :feature_count].reshape(initial_shape_X)
Y = whole_d[:, feature_count:]
return X, Y
def des_label(label):
i = np.argmax(label)
return ("BLUE", "WHITE", "REFEREE")[i]
X, Y = load_data()
X.shape, Y.shape
# showing one image
print(des_label(Y[0]))
cv2_imshow(X[0])
# train test split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1,
shuffle=True,
random_state=41)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train,
test_size=0.15,
shuffle=True,
random_state=41)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape, X_val.shape, Y_val.shape
# preprocess
scaler = MinMaxScaler()
main_shape_X_train = X_train.shape
main_shape_X_test = X_test.shape
scaler_train = scaler.fit(X_train.reshape(X_train.shape[0], -1))
X_train = scaler.transform(X_train.reshape(X_train.shape[0], -1)).reshape(main_shape_X_train)
X_test = scaler.fit_transform(X_test.reshape(X_test.shape[0], -1)).reshape(*main_shape_X_test)
np.max(X_train[0].ravel()), np.min(X_train[0].ravel()), X_train.shape
def naive_inception_module(layer_in, f1=2, f2=2, f3=2):
# 1x1 conv
conv1 = keras.layers.Conv2D(f1, (1,1), padding='same', activation='relu')(layer_in)
# 3x3 conv
conv3 = keras.layers.Conv2D(f2, (3,3), padding='same', activation='relu')(layer_in)
# 5x5 conv
conv5 = keras.layers.Conv2D(f3, (5,5), padding='same', activation='relu')(layer_in)
# 3x3 max pooling
pool = keras.layers.MaxPooling2D((3,3), strides=(1,1), padding='same')(layer_in)
# concatenate filters, assumes filters/channels last
layer_out = keras.layers.concatenate([conv1, conv3, conv5, pool], axis=-1)
return layer_out
input_layer = keras.layers.Input([*X_train.shape[1:]])
second_layer = keras.layers.Conv2D(20, 5, padding="same")(input_layer)
th_layer = keras.layers.Activation("relu")(second_layer)
inception = naive_inception_module(th_layer)
inception = naive_inception_module(inception)
fo_layer = keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(inception)
fi_layer = keras.layers.Flatten()(fo_layer)
x_layer = keras.layers.Dense(250,
kernel_initializer=keras.initializers.HeNormal(),
kernel_regularizer=keras.regularizers.L1())(fi_layer)
s_layer = keras.layers.Activation(keras.activations.relu)(x_layer)
e_layer = keras.layers.Dropout(rate=0.5)(s_layer)
n_layer = keras.layers.Dense(100,
kernel_initializer=keras.initializers.HeNormal(),
kernel_regularizer=keras.regularizers.L1())(e_layer)
t_layer = keras.layers.Activation(keras.activations.relu)(n_layer)
ee_layer = keras.layers.Dropout(rate=0.5)(t_layer)
out_layer = keras.layers.Dense(3, activation="softmax")(ee_layer)
model = keras.models.Model(inputs=input_layer, outputs=out_layer)
model.summary()
# model lenet 5 or vgg-16
def make_model(input_shape, output_dim):
layers = []
# init_kernel = lambda shape, dtype=tf.int32: tf.random.normal(shape, dtype=dtype)
layers.append(keras.layers.Input(input_shape))
layers.append(keras.layers.Conv2D(20, 5, padding="same", input_shape=input_shape))
layers.append(keras.layers.Activation("relu"))
layers.append(keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
layers.append(keras.layers.Conv2D(20, 5, padding="same"))
layers.append(keras.layers.Activation("relu"))
layers.append(keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
layers.append(keras.layers.Conv2D(10, 5, padding="same"))
layers.append(keras.layers.Activation("relu"))
layers.append(keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
layers.append(keras.layers.Flatten())
layers.append(keras.layers.Dense(80,
kernel_initializer=keras.initializers.HeNormal(),
kernel_regularizer=keras.regularizers.L1()))
layers.append(keras.layers.Activation(keras.activations.relu))
layers.append(keras.layers.Dropout(rate=0.5))
layers.append(keras.layers.Dense(50,
kernel_initializer=keras.initializers.HeNormal(),
kernel_regularizer=keras.regularizers.L1()))
layers.append(keras.layers.Activation(keras.activations.relu))
layers.append(keras.layers.Dropout(rate=0.5))
layers.append(keras.layers.Dense(output_dim, activation="softmax"))
model = keras.models.Sequential(layers)
model.summary()
return model
model = make_model(input_shape=[*X_train.shape[1:]], output_dim=3)
# compiling
model.compile(optimizer=keras.optimizers.Adam(),
loss=keras.losses.categorical_crossentropy,
metrics=["accuracy"])
datagen = ImageDataGenerator( # data augmentation
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
zoom_range=0.2,
fill_mode='nearest')
batch_size = 128
history = model.fit(datagen.flow(X_train, Y_train, batch_size=batch_size),
validation_data=(X_val, Y_val),
steps_per_epoch=len(Y_train) // batch_size, epochs=50, workers=6)
plt.figure(figsize=(15, 10))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend()
plt.grid(True)
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.legend()
plt.grid(True)
plt.show()
history.history.keys()
model.evaluate(X_test, Y_test)
pred = model.predict(X_test)
np.argmax(pred, axis=1), Y_test
# apply my test
I = cv2.imread("./my_test/referee.png")
I1 = cv2.imread("./my_test/referee1.png")
I2 = cv2.imread("./my_test/white.png")
I = cv2.resize(I, (80, 80))
I1= cv2.resize(I1, (80, 80))
I2= cv2.resize(I2, (80, 80))
cv2_imshow(I1)
cv2_imshow(I)
print(I.shape, I.reshape(1, -1).shape)
X_my_test = np.vstack([[I], [I1], [I2]])
Y_my_labels = np.vstack([np.array([0, 1, 0]),
np.array([0, 0, 1]),
np.array([0, 0, 1])])
pred = model.predict(X_my_test)
print(X_my_test.shape, Y_my_labels.shape)
np.argmax(pred, axis=1), pred
model_json = model.to_json()
with open("model.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
Debido a que el núcleo del aprendizaje profundo es el etiquetado de datos, los datos etiquetados no se pueden proporcionar para que todos los preparen por sí mismos.
Para la preparación de datos, consulte " Video de fútbol AI (3) - Modelo de autoentrenamiento de detección de objetivos YOLOV7 "
Genera archivos "model.h5" "model.json"
2.2 Keras modelo H5 a Onnx
Consulte " Aprendizaje automático de IA (5) Keras h5 a razonamiento onnx C# ML "
3. Entrenamiento de la red YoloV7
Para la preparación de datos, consulte " Video de fútbol AI (3) - Modelo de autoentrenamiento de detección de objetivos YOLOV7 "
3.1 Modificación de elementos de configuración
1,clases_predefinidas.txt
“AZUL”, “BLANCO”, “ÁRBITRO”
Valores de índice: 0, 1, 2
2,coco.yaml
establecer nc: 3
Establecer nombres: ['AZUL', 'BLANCO', 'REFEREE']
3,yolov7.yaml
establecer ' nc:3 '
3.2 Instrucciones de etiquetado
La imagen del marco marca tres categorías, trata de marcar a todas las personas que aparecen, incluido el árbitro, el equipo azul y el equipo blanco.
4. Aplicación de ingeniería del modelo Keras
Para la aplicación modelo de YoloV7, consulte " Video de fútbol AI (3) - Modelo de autoentrenamiento de detección de objetivos YOLOV7 "
4.1 Definir la interfaz
public interface IClassific
{
bool UseClassic {
get; set; }
/// <summary>
/// 加载模型
/// </summary>
void LoadModel();
/// <summary>
/// 预测
/// </summary>
/// <param name="inputs">多枚图像80x80的rgb</param>
/// <returns>分类结果</returns>
NDarray Predict(NDarray? inputs);
}
4.2 Modelo de aplicación
public class ClassificWithOnnx : IClassific
{
private InferenceSession? session;
public ClassificWithOnnx(bool useClassic)
{
UseClassic = useClassic;
if (useClassic)
LoadModel();
}
public bool UseClassic {
get; set; }
/// <summary>
/// <inheritdoc/>
/// </summary>
public void LoadModel()
{
session = new InferenceSession(Path.Combine(System.AppDomain.CurrentDomain.BaseDirectory, "Assets/model.onnx"));
}
/// <summary>
/// <inheritdoc/>
/// </summary>
public NDarray Predict(NDarray? inputs)
{
var inputTensor = inputs?.ToDenseTensor();
var input = new List<NamedOnnxValue> {
NamedOnnxValue.CreateFromTensor<float>("input_11", inputTensor) };
var outputs = session?.Run(input).ToList().Last().AsEnumerable<float>().ToArray();
var outputarray = np.array<float>(outputs!);
var arr = outputarray.reshape(inputs!.shape.Dimensions[0], 3);
arr = np.argmax(arr, axis: 1);
return arr;
}
}
El nombre de la primera capa de red de razonamiento es "input_11" para ver el método y la definición del método estático, consulte " Aprendizaje automático de IA (5) Keras h5 a onnx C# ML razonamiento "
4.3 Clasificación del personal
Basándose en " Soccer Video AI (2) - Detección de objetivos de jugadores y balones ", la persona se detecta primero, la imagen encuadernada de la persona se recorta y se pasa a la función de clasificación.
[Fact]
public void TestPlayerClassific()
{
Mat roi;
NDarray? rois = null;
NDarray? labels = null;
List<YoloPrediction> lst;
var detector = new DetectorYolov7();
var classificor = new ClassificWithOnnx(true);
using (var mat = LoadImages.Load("field_2.jpg"))
{
lst = detector.Detect(mat);
lst.ForEach(prediction => {
var rect = prediction.Rectangle;
roi = new Mat(frame, GetBoundSize(rect,frame)).Clone();
roi = roi.Resize(new OpenCvSharp.Size(80, 80));
//添加待识别对象,供批量预测对象类型使用
var ndarray = roi.ToNDarray();
if (null == rois)
rois = ndarray;
else
rois = np.concatenate((rois, ndarray));
labels = classificor.Predict(rois);
}
}
Assert.True(labels?.item<int>(0)>=0);
}