Vehicle recognition using fully connected neural networks

import os
import zipfile
import random
import paddle
import numpy as np
import matplotlib.pyplot as plt
import PIL.Image as Image
from paddle.io import Dataset

(1) Import os: Import the operating system (OS) module, which provides a method to interact with the file system, execute system commands, and manage environment variables.
(2)import zipfile: import module, which provides tools for processing zip archives.
(3) Import random: Import module, which provides functions for generating random numbers, random sequences and selecting random elements from sequences. random
(4)import paddle: import module, which is an open source deep learning platform that provides a comprehensive set of tools to develop and train machine learning models. paddle
(5) import numpy as np: Import the module and alias it as np, which is a popular numerical computing library in Python that provides tools for processing arrays, matrices and other mathematical operations. numpy
(6)import matplotlib.pyplot as plt: This is a Python plotting library that provides tools for creating visualizations, such as line charts, scatter plots, histograms, etc. matplotlib.pyplot
(7)import PIL.Image: This is part of the Python Imaging Library (PIL) and provides tools for processing image files. PIL.Image
(8) from paddle.io import Dataset: Import the module from the package that provides tools for processing datasets in PaddlePaddle. Such as loading, preprocessing and iterating data.


'''
参数配置
'''
train_parameters = {
    "input_size": [3, 120, 120],                             #输入图片的shape
    "class_dim": 3,                                               #分类数
    "src_path":"/home/aistudio/data/data72920/Data.zip",     #原始数据集路径
    "target_path":"/home/aistudio/work/",                    #要解压的路径
    "train_list_path": "/home/aistudio/data/train.txt",      #train.txt路径
    "eval_list_path": "/home/aistudio/data/eval.txt",        #eval.txt路径
    "label_dict":{'0':'汽车','1':'摩托车','2':'货车'},        #标签字典
    "num_epochs": 3,                                        #训练轮数
    "train_batch_size": 8,                                   #训练时每个批次的大小
    "learning_strategy": {                                   #优化函数相关的配置
        "lr": 0.1                                          #超参数学习率
    }, 
    'skip_steps': 50,                                        #每N个批次打印一次结果
    'save_steps': 500,                                       #每N个批次保存一次模型参数
    "checkpoints": "/home/aistudio/work/checkpoints"         #保存的路径
}

1. Data preparation

(1) Decompress the original data set
(2) Divide the training set and validation set according to proportion
(3) Shuffle the order and generate a data list
(4) Define the data reader


#解压原始数据集
def unzip_data(src_path,target_path):
    '''
    解压原始数据集，将src_path路径下的zip包解压至target_path目录下
    '''
    if(not os.path.isdir(os.path.join(target_path,'Data'))):     
        z = zipfile.ZipFile(src_path, 'r')
        z.extractall(path=target_path)
        z.close()
        print('数据集解压完成')
    else:
        print('文件已存在')

This code defines a function called unzip_data, which takes two parameters: src_path and target_path.

def get_data_list(target_path, train_list_path, eval_list_path):
    '''
    生成数据列表
    '''
    data_dir = 'work/Data'     #这会将变量设置为包含数据的目录的路径。data_dir
    all_data_list  = []             #这将初始化一个名为的空列表，该列表将用于存储数据路径及其相应的标签。
    for im in os.listdir(data_dir):  #这将循环访问 指定的目录中的所有文件
        img_path = os.path.join(data_dir, im)  #通过联接图像和文件名来构造图像文件的完整路径
        img_label = str(int(im.split('_')[0])-1)  #假定标签是文件名的第一部分，下划线之前
        all_data_list.append(img_path + '\t' + img_label + '\n')  #这会附加一个字符串，其中包含图像路径，后跟制表符 ，后跟标签和换行符。
        
    # 对训练列表进行乱序
    random.shuffle(all_data_list) #随机洗牌列表，为了防止模型在训练期间记住数据的顺序。
    with open(train_list_path, 'a') as f1:   #这将在追加模式下打开两个文件。用于确保文件在使用后正确关闭。
        with open(eval_list_path, 'a') as f2:
            for ind, img_path_label in enumerate(all_data_list): #循环并将每个元素分配变量，并将索引分配给变量。
                #划分测试集和训练集
                if ind % 10 == 0: #这会将 的每个元素写入，具体取决于值。每 10个元素写入 ，其余元素写入
                    f2.write(img_path_label) 
                else:
                    f1.write(img_path_label)
    print ('生成数据列表完成！')


#参数初始化
src_path=train_parameters['src_path']
target_path=train_parameters['target_path']
train_list_path=train_parameters['train_list_path']
eval_list_path=train_parameters['eval_list_path']

#解压原始数据到指定路径
unzip_data(src_path,target_path)

#每次生成数据列表前，首先清空train.txt和eval.txt
with open(train_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
with open(eval_list_path, 'w') as f: 
    f.seek(0)
    f.truncate()     

#生成数据列表   
get_data_list(target_path,train_list_path,eval_list_path)


#参数初始化
src_path=train_parameters['src_path']
target_path=train_parameters['target_path']
train_list_path=train_parameters['train_list_path']  #此代码使用源数据、目标目录以及训练和评估数据列表
eval_list_path=train_parameters['eval_list_path']   #的文件路径的路径初始化变量。

#解压原始数据到指定路径
unzip_data(src_path,target_path)

#每次生成数据列表前，首先清空train.txt和eval.txt
with open(train_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
with open(eval_list_path, 'w') as f: 
    f.seek(0)
    f.truncate()     
#生成数据列表   
get_data_list(target_path,train_list_path,eval_list_path)


此代码定义了一个名为的 PyTorch 数据集类，该类可用于函数生成的训练或评估数据列表中加载图像和标签。
class dataset(Dataset):
    def __init__(self, data_path, mode='train'): #此代码为采用两个参数的类定义初始化方法
        """
        数据读取器
        :param data_path: 数据集所在路径
        :param mode: train or eval
        """
        super().__init__()   #此代码调用继承自的类的初始化方法
        #这些行定义类的实例变量,包含训练和评估数据列表的目录的路径，存储图像文件路径的列表，存储相应标签的列表。
        self.data_path = data_path  
        self.img_paths = []
        self.labels = []
#这些行根据 mode的值读取训练或评估数据列表中，并使用图像的文件路径和相应标签填充img和lable列表。
        if mode == 'train':
            with open(os.path.join(self.data_path, "train.txt"), "r", encoding="utf-8") as f:
                self.info = f.readlines()
            for img_info in self.info:
                img_path, label = img_info.strip().split('\t')
                self.img_paths.append(img_path)
                self.labels.append(int(label))

        else:
            with open(os.path.join(self.data_path, "eval.txt"), "r", encoding="utf-8") as f:
                self.info = f.readlines()
            for img_info in self.info:
                img_path, label = img_info.strip().split('\t')
                self.img_paths.append(img_path)
                self.labels.append(int(label))

 # 此代码定义了类的方法，PyTorch DataLoader 调用该方法以检索给定索引的特定图像和标签。
    def __getitem__(self, index):
        """
        获取一组数据
        :param index: 文件索引号
        :return:
        """
        # 第一步打开图像文件并获取label值
        #此代码检索与给定索引对应的图像的文件路径
        img_path = self.img_paths[index]
        img = Image.open(img_path)
        if img.mode != 'RGB':
            img = img.convert('RGB') 
        img = np.array(img).astype('float32')
        img = img.transpose((2, 0, 1)) / 255
        label = self.labels[index]
        label = np.array([label], dtype="int64")
        return img, label

    def print_sample(self, index: int = 0):
        print("文件名", self.img_paths[index], "\t标签值", self.labels[index])
    def __len__(self):
        return len(self.img_paths)


#训练数据加载
train_dataset = dataset('/home/aistudio/data',mode='train')
train_loader = paddle.io.DataLoader(train_dataset, 
                                    batch_size=train_parameters['train_batch_size'], 
                                    shuffle=True
                                    )
#测试数据加载
eval_dataset = dataset('/home/aistudio/data',mode='eval')
eval_loader = paddle.io.DataLoader(eval_dataset,
                                   batch_size=train_parameters['train_batch_size'], 
                                   shuffle=False
                                   )
                                   #在这里，batch_size 参数用于指定每个 batch 的数据大小，shuffle 参数用于打乱数据集。

2. Model configuration

class MyDNN(paddle.nn.Layer):
	def __init__(self):
		super(MyDNN,self).__init__()
		self.linear1 = paddle.nn.Linear(in_features=3*120*120, out_features=4096) # 定义了一个全连接层 linear1，输入维度为 3*120*120，输出维度为 4096。
		self.relu1 = paddle.nn.ReLU() # 定义了一个激活函数 relu1，使用 ReLU 激活函数。
		self.linear2 = paddle.nn.Linear(in_features=4096, out_features=2048) #定义了一个全连接层 linear2，输入维度为 4096，输出维度为 2048。
		self.relu2 = paddle.nn.ReLU()  #  定义了一个激活函数 relu2，使用 ReLU 激活函数。
		self.linear3 = paddle.nn.Linear(in_features=2048, out_features=3)  # 定义了一个全连接层 linear3，输入维度为 2048，输出维度为 3。
		
def forward(self,input): # forward 定义执行实际运行时网络的执行逻辑、前向传播
		# input.shape (8, 3, 120, 120)
		x = paddle.reshape(input, shape=[-1,3*120*120]) #-1 表示这个维度的值是从 x
的元素总数和剩余维度推断出来的，有且只能有一个维度设置为-1
		# print(x.shape)
		x = self.linear1(x) # 将变换后的数据 x 传入 linear1 进行线性变换。
		x = self.relu1(x) # 将经过线性变换后的数据 x 传入 relu1 进行 ReLU 激活。
		x = self.linear2(x)
		x = self.relu2(x)
		y = self.linear3(x)
	return y # 返回y

This code defines a custom neural network class MyDNN, which inherits from paddle.nn.Layer and overrides the init method and forward method.

3. Model training

def draw_process(title,color,iters,data,label):
    plt.title(title, fontsize=24)
    plt.xlabel("iter", fontsize=20)
    plt.ylabel(label, fontsize=20)
    plt.plot(iters, data,color=color,label=label) 
    plt.legend()
    plt.grid()
    plt.show()

This function is used to draw the change curve of an indicator during the training process. The meaning of the parameters is as follows:
title: the title of the image.
color: The color of the curve.
iters: The abscissa represents the number of iterations.
data: The ordinate represents the value of an indicator in each iteration cycle.
label: The label of the ordinate, used to describe the specific indicator represented by the ordinate.

model = MyDNN()  #定义模型
model.train()          #训练模型
cross_entropy = paddle.nn.CrossEntropyLoss() #定义损失函数
optimizer = paddle.optimizer.Adam(learning_rate=train_parameters['learning_strategy']['lr'],
                                  parameters=model.parameters())   #定义优化器
                                  
steps = 0 #初始化步数变量 steps，表示已经迭代的 batch 的数量，初始值为 0。

Iters, total_loss, total_acc = [], [], [] #创建三个空列表 Iters、total_loss 和 total_acc，用于记录迭代过程中的 batch 数量、损失和准确率。

for epo in range(train_parameters['num_epochs']):   # 使用 for 循环遍历每个 epoch，train_parameters['num_epochs'] 表示需要迭代 epoch 数量。
    for _, data in enumerate(train_loader()):  #迭代训练
        steps += 1 #将步数加 1，表示已经迭代的 batch 的数量。
        x_data = data[0]  #将数据 batch 中的输入和标签分别赋值给 x_data 和 y_data。
        y_data = data[1]
        predicts = model(x_data) #使用模型进行前向传递，并得到预测结果。
        loss = cross_entropy(predicts, y_data) #计算预测结果和真实标签之间的交叉熵损失。
        acc = paddle.metric.accuracy(predicts, y_data) #计算预测结果的准确率。
        loss.backward() #使用反向传播算法计算梯度。
        optimizer.step() #使用优化器更新模型参数。
        optimizer.clear_grad() #清空优化器中的梯度信息。
        if steps % train_parameters["skip_steps"] == 0: #如果当前步数可以被跳过的步骤数量整除，执行下面的代码。
            Iters.append(steps)
            total_loss.append(loss.numpy()[0])
            total_acc.append(acc.numpy()[0]) #将当前步数、损失和准确率的值添加到相应的列表中。
            #打印中间过程
            print('epo: {}, step: {}, loss is: {}, acc is: {}'\
                  .format(epo, steps, loss.numpy(), acc.numpy()))
        #保存模型参数
        if steps % train_parameters["save_steps"] == 0:
            save_path = train_parameters["checkpoints"]+"/"+"save_dir_" + str(steps) + '.pdparams'
            print('save model to: ' + save_path)
            paddle.save(model.state_dict(),save_path)
paddle.save(model.state_dict(),train_parameters["checkpoints"]+"/"+"save_dir_final.pdparams")
draw_process("trainning loss","red",Iters,total_loss,"trainning loss")
draw_process("trainning acc","green",Iters,total_acc,"trainning acc") #绘制训练过程中损失和准确率的图像。

This code is the main program for training the model.

4. Model Evaluation

model__state_dict = paddle.load('work/checkpoints/save_dir_final.pdparams')   #加载已经训练好的模型。
model_eval = MyDNN()
model_eval.set_state_dict(model__state_dict) 
model_eval.eval()

#代码对验证数据集进行迭代，对每个批次数据进行评估，并记录每个批次数据的准确率。这些准确率存储在 accs 列表中。
accs = []
for _, data in enumerate(eval_loader()):
    x_data = data[0]
    y_data = data[1]
    predicts = model_eval(x_data)
    acc = paddle.metric.accuracy(predicts, y_data)
    accs.append(acc.numpy()[0])
print('模型在验证集上的准确率为：',np.mean(accs)) #  代码计算并输出模型在验证集上的平均准确率。

5. Model prediction

def load_image(img_path):
    '''
    预测图片预处理
    '''
    img = Image.open(img_path)  #打开待预测图像
    # print(img.mode) 
    if img.mode != 'RGB':  
        img = img.convert('RGB') 
    img = img.resize((120, 120), Image.ANTIALIAS) #使用 resize 方法将图像大小重置为 120x120 像素
    img = np.array(img).astype('float32')  #将图像转换为 Numpy 数组，并将数据类型设置为 float32
    img = img.transpose((2, 0, 1)) / 255  # HWC to CHW 并像素归一化
    return img

#Predict a picture to be predicted and output the prediction result

model__state_dict = paddle.load('work/checkpoints/save_dir_final.pdparams') 
model_predict = MyDNN()
model_predict.set_state_dict(model__state_dict) 
model_predict.eval()
infer_path='work/车辆.png'
infer_img = Image.open(infer_path)
plt.imshow(infer_img)          #根据数组绘制图像
plt.show()                     #显示图像
#对预测图片进行预处理
infer_img = load_image(infer_path)
# print(type(infer_img))
infer_img = infer_img[np.newaxis,:, : ,:]  #reshape(-1,3,50,50)
infer_img = paddle.to_tensor(infer_img)
results = model_predict(infer_img)
print(results)
results = paddle.nn.functional.softmax(results)
print(results)
print("汽车:{:.2f},摩托车:{:.2f}，货车:{:.2f}" .format(results.numpy()[0][0],
                                                    results.numpy()[0][1],
                                                    results.numpy()[0][2]))