PaddlePaddle 之VisulDL可视化工具

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/gentelyang/article/details/86567557

VisualDL介绍

VisualDL是一个面向深度学习任务设计的可视化工具,包含了scalar、参数分布、模型结构、图像可视化等功能。可以这样说:“所见即所得”。我们可以借助VisualDL来观察我们训练的情况,方便我们对训练的模型进行分析,改善模型的收敛情况。

之前我们使用的paddle.v2.plot接口,也可以观察训练的情况,但是只是支持CSOT的折线图而已。而VisualDL可以支持一下这个功能:
--------------------- 
VisualDL底层采用C++编写,但是它在提供C++ SDK的同时,也支持Python SDK,我们主要是使用Python的SDK。顺便说一下,VisualDL除了支持PaddlePaddle,之外,还支持pytorch, mxnet在内的大部分主流DNN平台。

VisualDL的安装

sudo pip install --upgrade visualdl

我用的是pip安装,其它安装方法没用。

简单使用VisualDL

# coding=utf-8
# 导入VisualDL的包
from visualdl import LogWriter

# 创建一个LogWriter,第一个参数是指定存放数据的路径,
# 第二个参数是指定多少次写操作执行一次内存到磁盘的数据持久化
logw = LogWriter("./random_log", sync_cycle=10000)

# 创建训练和测试的scalar图,
# mode是标注线条的名称,
# scalar标注的是指定这个组件的tag
with logw.mode('train') as logger:
    scalar0 = logger.scalar("scratch/scalar")

with logw.mode('test') as logger:
    scalar1 = logger.scalar("scratch/scalar")

# 读取数据
for step in range(1000):
    scalar0.add_record(step, step * 1. / 1000)
    scalar1.add_record(step, 1. - step * 1. / 1000)
--------------------- 
在IDE上run之后,会生成一个random_log的文件夹

然后在此打开终端

visualdl --logdir ./random_log --port 8080

然后在浏览器中输入:http://127.0.0.1:8080

非常的类似tensorborad

在PaddlePaddle中使用VisualDL

创建三个组件:scalar、image、histogram,并指定存放日志的路径。

# coding=utf-8
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.framework as framework
import paddle.v2 as paddle
from paddle.fluid.initializer import NormalInitializer
from paddle.fluid.param_attr import ParamAttr
from visualdl import LogWriter
from vgg import vgg16_bn_drop

# 创建VisualDL,并指定当前该项目的VisualDL的路径
logdir = "./data/tmp"
logwriter = LogWriter(logdir, sync_cycle=10)

# 创建loss的趋势图
with logwriter.mode("train") as writer:
    loss_scalar = writer.scalar("loss")

# 创建acc的趋势图
with logwriter.mode("train") as writer:
    acc_scalar = writer.scalar("acc")

# 定义没多少次重新输出一遍
num_samples = 4
# 创建卷积层和输出图像的图形化展示
with logwriter.mode("train") as writer:
    conv_image = writer.image("conv_image", num_samples, 1)
    input_image = writer.image("input_image", num_samples, 1)

# 创建可视化的训练模型结构
with logwriter.mode("train") as writer:
    param1_histgram = writer.histogram("param1", 100)


def train(use_cuda, learning_rate, num_passes, BATCH_SIZE=128):
    # 定义图像的类别数量
    class_dim = 10
    # 定义图像的通道数和大小
    image_shape = [3, 32, 32]
    # 定义输入数据大小,指定图像的形状,数据类型是浮点型
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    # 定义标签,类型是整型
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    # 获取神经网络
    net, conv1 = vgg16_bn_drop(image)
    # 获取全连接输出,获得分类器
    predict = fluid.layers.fc(
        input=net,
        size=class_dim,
        act='softmax',
        param_attr=ParamAttr(name="param1", initializer=NormalInitializer()))

    # 获取损失函数
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    # 定义平均损失函数
    avg_cost = fluid.layers.mean(x=cost)

    # 每个batch计算的时候能取到当前batch里面样本的个数,从而来求平均的准确率
    batch_size = fluid.layers.create_tensor(dtype='int64')
    print batch_size
    batch_acc = fluid.layers.accuracy(input=predict, label=label, total=batch_size)

    # 定义优化方法
    optimizer = fluid.optimizer.Momentum(
        learning_rate=learning_rate,
        momentum=0.9,
        regularization=fluid.regularizer.L2Decay(5 * 1e-5))

    opts = optimizer.minimize(avg_cost)

    # 是否使用GPU
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    # 创建调试器
    exe = fluid.Executor(place)
    # 初始化调试器
    exe.run(fluid.default_startup_program())

    # 获取训练数据
    train_reader = paddle.batch(
        paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE)

    # 指定数据和label的对于关系
    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    step = 0
    sample_num = 0
    start_up_program = framework.default_startup_program()
    param1_var = start_up_program.global_block().var("param1")

    accuracy = fluid.average.WeightedAverage()
    # 开始训练,使用循环的方式来指定训多少个Pass
    for pass_id in range(num_passes):
        # 从训练数据中按照一个个batch来读取数据
        accuracy.reset()
        for batch_id, data in enumerate(train_reader()):
            loss, conv1_out, param1, acc, weight = exe.run(fluid.default_main_program(),
                                                           feed=feeder.feed(data),
                                                           fetch_list=[avg_cost, conv1, param1_var, batch_acc,
                                                                       batch_size])
            accuracy.add(value=acc, weight=weight)
            pass_acc = accuracy.eval()

            # 重新启动图形化展示组件
            if sample_num == 0:
                input_image.start_sampling()
                conv_image.start_sampling()
            # 获取taken
            idx1 = input_image.is_sample_taken()
            idx2 = conv_image.is_sample_taken()
            # 保证它们的taken是一样的
            assert idx1 == idx2
            idx = idx1
            if idx != -1:
                # 加载输入图像的数据数据
                image_data = data[0][0]
                input_image_data = np.transpose(
                    image_data.reshape(image_shape), axes=[1, 2, 0])
                input_image.set_sample(idx, input_image_data.shape,
                                       input_image_data.flatten())
                # 加载卷积数据
                conv_image_data = conv1_out[0][0]
                conv_image.set_sample(idx, conv_image_data.shape,
                                      conv_image_data.flatten())

                # 完成输出一次
                sample_num += 1
                if sample_num % num_samples == 0:
                    input_image.finish_sampling()
                    conv_image.finish_sampling()
                    sample_num = 0

            # 加载趋势图的数据
            loss_scalar.add_record(step, loss)
            acc_scalar.add_record(step, acc)
            # 添加模型结构数据
            param1_histgram.add_record(step, param1.flatten())
            # 输出训练日志
            print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str(pass_acc))
            step += 1


if __name__ == '__main__':
    # 开始训练
    train(use_cuda=True, learning_rate=0.005, num_passes=5)

猜你喜欢

转载自blog.csdn.net/gentelyang/article/details/86567557
今日推荐