(2) Building, saving and using the PaddlePaddle neural network model (VGG16 model, CIFAR data set)

Use PaddlePaddle to build the VGG16 model, using the CIFAR dataset. And save, call and predict the category of the actual picture for the trained model.

(1) The VGG model is a very famous model after the AlexNet model. The model further widens and deepens the network structure. Its core is 5 groups of convolution operations, and Max-Pooling is used to reduce the dimension between each group. Multiple consecutive 3x3 convolutions are used in the same group, and the number of convolution kernels in the same group is the same, so the convolution layer has several models of 11, 13, 16, and 19 layers:

insert image description here

The CIFAR-10 dataset contains 10 categories of image datasets. The image is a 32pxX32px square color image, but it is much smaller than the image in the ImageNet dataset, so it is necessary to do some processing on the CIFAR-10 image. The convolution part introduces BN Layer, random drop (Dropout) operation, the specific role:

insert image description here

(2) VGG16 model construction:

# 模型训练与保存
import os
import shutil
import paddle
paddle.enable_static()
import paddle.dataset.cifar as cifar
import paddle.fluid as fluid

# 定义VGG16神经网络
def vgg16(input, class_dim=10):
    def conv_block(conv, num_filter, groups):
        for i in range(groups):
            conv = fluid.layers.conv2d(input=conv,
                                       num_filters=num_filter,
                                       filter_size=3,
                                       stride=1,
                                       padding=1,
                                       act='relu')
        return fluid.layers.pool2d(input=conv, pool_size=2, pool_type='max', pool_stride=2)

    conv1 = conv_block(input, 64, 2)   # 循环2次卷积
    conv2 = conv_block(conv1, 128, 2)  # 循环2次卷积
    conv3 = conv_block(conv2, 256, 3)  # 循环3次卷积
    conv4 = conv_block(conv3, 512, 3)  # 循环3次卷积
    conv5 = conv_block(conv4, 512, 3)  # 循环3次卷积
    # 针对VGG模型缺点,创建BN层和随机丢弃(Dropout)操作,随机丢弃一些神经元,使网络模型稀疏,抑制梯度弥散,起到正则化效果防止过拟合
    fc1 = fluid.layers.fc(input=conv5, size=512)   # 全连接层
    dp1 = fluid.layers.dropout(x=fc1, dropout_prob=0.5)
    fc2 = fluid.layers.fc(input=dp1, size=512)
    bn1 = fluid.layers.batch_norm(input=fc2, act='relu')
    fc2 = fluid.layers.dropout(x=bn1, dropout_prob=0.5)
    out = fluid.layers.fc(input=fc2, size=class_dim, act='softmax')
    return out

(3) Define the characteristics of CIFAR data in the input layer, obtain the loss function, copy the test program, optimize the optimizer, obtain CIFAR data, define the CPU executor, define the data-oriented feeder for feeding the model, train the model, test, and create a path to save the model. code show as below:

# 模型训练与保存
import os
import shutil
import paddle
paddle.enable_static()
import paddle.dataset.cifar as cifar
import paddle.fluid as fluid

# 定义VGG16神经网络
def vgg16(input, class_dim=10):
    def conv_block(conv, num_filter, groups):
        for i in range(groups):
            conv = fluid.layers.conv2d(input=conv,
                                       num_filters=num_filter,
                                       filter_size=3,
                                       stride=1,
                                       padding=1,
                                       act='relu')
        return fluid.layers.pool2d(input=conv, pool_size=2, pool_type='max', pool_stride=2)

    conv1 = conv_block(input, 64, 2)   # 循环2次卷积
    conv2 = conv_block(conv1, 128, 2)  # 循环2次卷积
    conv3 = conv_block(conv2, 256, 3)  # 循环3次卷积
    conv4 = conv_block(conv3, 512, 3)  # 循环3次卷积
    conv5 = conv_block(conv4, 512, 3)  # 循环3次卷积
    # 针对VGG模型缺点,创建BN层和随机丢弃(Dropout)操作,随机丢弃一些神经元,使网络模型稀疏,抑制梯度弥散,起到正则化效果防止过拟合
    fc1 = fluid.layers.fc(input=conv5, size=512)   # 全连接层
    dp1 = fluid.layers.dropout(x=fc1, dropout_prob=0.5)
    fc2 = fluid.layers.fc(input=dp1, size=512)
    bn1 = fluid.layers.batch_norm(input=fc2, act='relu')
    fc2 = fluid.layers.dropout(x=bn1, dropout_prob=0.5)
    out = fluid.layers.fc(input=fc2, size=class_dim, act='softmax')
    return out

# 定义输出层CIFAR数据集图片是32pxX32px的3通道彩色图。包括飞机、汽车等10个类别的60000张图片,每个类别6000
# 定义输出层
image = fluid.data(name='image', shape=[None, 3, 32, 32], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')

# 获取分类器()指定分类大小为10
model = vgg16(image, 10)

# 获取损失函数和准确率函数
cost = fluid.layers.cross_entropy(input=model, label=label)
avg_cost = fluid.layers.mean(cost)
acc = fluid.layers.accuracy(input=model, label=label, k=1)

# 获取训练和测试程序
test_program = fluid.default_main_program().clone(for_test=True)

# 定义优化方法
optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=1e-3,
                                              momentum=0.9)
opts = optimizer.minimize(avg_cost)

# 获取CIFAR数据
train_reader = paddle.batch(cifar.train10(), batch_size=32)
test_reader = paddle.batch(cifar.test10(), batch_size=32)

# 定义一个使用CPU的执行器
# place = fluid.CUDAPlace(0)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
# 进行参数初始化
exe.run(fluid.default_startup_program())

# 定义feeder输入数据
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

# 开始训练和测试(循环训练一次,可训练多次)
for pass_id in range(1):
    # 进行训练
    for batch_id, data in enumerate(train_reader()):
        train_cost, train_acc = exe.run(program=fluid.default_main_program(),
                                        feed=feeder.feed(data),
                                        fetch_list=[avg_cost, acc])
        # 每100个batch打印一次信息
        if batch_id % 100 == 0:
            print('Pass:%d, Batch:%d, Cost:%0.5f, Accuracy:%0.5f' %
                  (pass_id, batch_id, train_cost[0], train_acc[0]))

    # 进行测试
    test_accs = []
    test_costs = []
    for batch_id, data in enumerate(test_reader()):
        test_cost, test_acc = exe.run(program=test_program,
                                      feed=feeder.feed(data),
                                      fetch_list=[avg_cost, acc])
        test_accs.append(test_acc[0])
        test_costs.append(test_cost[0])
    # 求测试结果的平均值
    test_cost = (sum(test_costs) / len(test_costs))
    test_acc = (sum(test_accs) / len(test_accs))
    print('Test:%d, Cost:%0.5f, Accuracy:%0.5f' % (pass_id, test_cost, test_acc))

    # 保存预测模型
    save_path = 'model\\persistable_model_01'
    # 删除旧的模型文件
    shutil.rmtree(save_path, ignore_errors=True)
    # 创建保持模型文件目录
    os.makedirs(save_path)
    # 保存预测模型
    fluid.io.save_inference_model(dirname=save_path,
                                  feeded_var_names=[image.name],
                                  target_vars=[model],
                                  executor=exe)

operation result:

insert image description here

The model is also successfully generated:

insert image description here

(4) Call the model trained above, and use the model to predict a car photo:

# 调用上面训练得到模型,并使用
import paddle.fluid as fluid
from PIL import Image
import numpy as np
import paddle
paddle.enable_static()

# 创建执行器
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())

# 保存预测模型路径
save_path = 'model\\persistable_model_01'
# 从模型中获取预测程序(infer_program)、输入数据名称列表(feeded_var_names)、分类器(target_var)
[infer_program,
 feeded_var_names,
 target_var] = fluid.io.load_inference_model(dirname=save_path,
                                             executor=exe)
# 预处理图片
def load_image(file):
    im = Image.open(file)    # 打开图片
    im = im.convert("RGB")   # 如图片是4通道,运行报错,需将图片(4通道,多出是Alpha通道)转换成3通道图片
    im = im.resize((32, 32), Image.ANTIALIAS)   # 缩放尺寸,ANTIALIAS:高剂质量
    im = np.array(im).astype(np.float32)
    # PIL打开图片存储顺序为H(高度)W(宽度)C(通道)
    # PaddlePaddle要求数据顺序为CHW,所以需要转换顺序。
    im = im.transpose((2, 0, 1))   #转换成通道、高度、宽度
    im = im / 255.0                #
    im = np.expand_dims(im, axis=0)   # 相应的axis=0轴上扩展维度
    return im

#获取需要预测的图片
img = load_image('F:\\PyQt_Serial_Assistant_Drive_Detect\\Friuts_Classify\\car.png')

# 执行预测
result = exe.run(program=infer_program,
                 feed={
    
    feeded_var_names[0]: img},
                 fetch_list=target_var)

# 显示图片并输出结果最大的label
# np.argsort函数功能:将result中的元素从小到大排列,[-1]即提取最大的值。
lab = np.argsort(result)[0][0][-1]

names = ['飞机', '汽车', '鸟', '猫', '鹿',
         '狗', '青蛙', '马', '船', '卡车']

print('预测结果标签为:%d, 名称为:%s, 概率为:%f' % (lab, names[lab], result[0][0][lab]))

Images that need to be predicted:

insert image description here

Successful prediction:

insert image description here

Predict one more picture of a dog:

insert image description here

forecast result:

insert image description here

Guess you like

Origin blog.csdn.net/K_AAbb/article/details/127366419