【深度学习】【python】vae变分自编码器实现 中文注释版

【深度学习】【python】vae变分自编码器实现 中文注释版

“你的代码很不错,不过下一秒就是我的了.”

这里写图片描述

环境要求

  • python3.5
  • tensorflow 1.4
  • pytorch 0.2.0

运行结果:

这里写图片描述

这里写图片描述

本程序只需要tensorflow.
程序如下:

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
变分自编码器
reference: https://jmetzen.github.io/2015-11-27/vae.html
"""
import sys
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from input_data import read_data_sets

# 随机数种子,用于重新生成数据;
np.random.seed(2017)
tf.set_random_seed(2017)

class VAE(object):
    """变分自编码器的简单实现"""
    def __init__(self, input_dim=784, z_dim=50, batch_size=100, encoder_hidden_size=[500, 500], 
                    decoder_hidden_size=[500, 500], act_fn=tf.nn.softplus):
        """
        -----------变量说明-----------------

        : input_dim: int, 输入维度;
        : z_dim: int, 潜变量空间的维度;
        : batch_size: int, 批规模;
        : encoder_hidden_size: list or tuple, 编码器的隐含层单元数;
        : decoder_hidden_size: list or tuple, 解码器的隐含层单元数;
        : act_fn: 激活函数;
        """
        # 变量初始化;
        self.input_dim = input_dim
        self.z_dim = z_dim
        self.batch_size = batch_size
        self.encoder_hidden_size = encoder_hidden_size
        self.decoder_hidden_size = decoder_hidden_size
        self.act_fn = act_fn

        self._bulid_model()

    def _bulid_model(self):
        """用于创建模型"""
        # 输入占位符;
        self.x = tf.placeholder(tf.float32, shape=[self.batch_size, self.input_dim])
        # 编码器: 确定高斯分布的期望和(log)方差;
        self.z_mean, self.z_log_sigma_sq = self._encoder(self.x)
        # 从高斯分布进行采样;
        eps = tf.random_normal([self.batch_size, self.z_dim], mean=0.0, stddev=1.0)
        # 计算z = mean + sigma*epsilon;
        self.z = tf.add(self.z_mean, tf.mul(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps))

        # 解码器: 确定重构输入的伯努利分布的均值;
        self.x_reconstr_mean = self._decoder(self.z)

        # 计算损失;
        with tf.name_scope("loss"):
            # 重构的损失: cross entropy交叉熵;
            reconstr_loss = -tf.reduce_sum(self.x * tf.log(1e-10 + self.x_reconstr_mean) + \
                            (1.0 - self.x) * tf.log(1e-10 + 1.0 - self.x_reconstr_mean), axis=1)
            # 潜变量的损失: KL散度;
            latent_loss = -0.5 * tf.reduce_sum(1.0 + self.z_log_sigma_sq - tf.square(self.z_mean) - \
                                    tf.exp(self.z_log_sigma_sq), axis=1)
            # 该batch的平均值;
            self.cost = tf.reduce_mean(reconstr_loss + latent_loss)

        # 优化;
        # 学习率;
        self.lr = tf.Variable(0.001, trainable=False)
        # 训练的参数;
        vars = tf.trainable_variables()
        # 优化占位符;
        self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.cost, var_list=vars)

    def _encoder(self, x, name="encoder"):
        """编码器Encoder"""
        # encoder过程;
        with tf.variable_scope(name):
            # 输入维度;
            n_in = self.input_dim
            # 对每一个encoder隐含层的单元;
            for i, s in enumerate(self.encoder_hidden_size):
                # 获取对应参数值;
                w, b = self._get_vars(n_in, s, name="h{0}".format(i))
                # 第一个单元;
                if i == 0:
                    # 激活函数计算;
                    h = self.act_fn(tf.nn.xw_plus_b(x, w, b))
                # 非第一个单元;
                else:
                    # 激活函数计算;
                    h = self.act_fn(tf.nn.xw_plus_b(h, w, b))
                # 下一轮input数量设为已迭代的所有单元的个数;
                n_in = s
            # 参数值获取;    
            w, b = self._get_vars(n_in, self.z_dim, name="out_mean")
            # 计算z_mean = h*w + b;
            z_mean = tf.nn.xw_plus_b(h, w, b)
            # 参数值获取;
            w, b = self._get_vars(n_in, self.z_dim, name="out_log_sigma")
            # 计算z_log_sigma_sq = h*w + b;
            z_log_sigma_sq = tf.nn.xw_plus_b(h, w, b)
            # 返回结果;
            return z_mean, z_log_sigma_sq

    def _decoder(self, z, name="decoder"):
        """解码器Decoder"""
        # decoder过程;
        with tf.variable_scope(name):
            # 输入的维度是encoder输出的维度;
            n_in = self.z_dim
            # 对每一个decoder隐含层的单元;
            for i, s in enumerate(self.decoder_hidden_size):
                # 参数值;
                w, b = self._get_vars(n_in, s, name="h{0}".format(i))
                # 第一个单元;
                if i == 0:
                    # 激活函数计算;
                    h = self.act_fn(tf.nn.xw_plus_b(z, w, b))
                # 非第一个单元;
                else:
                    # 激活函数计算;
                    h = self.act_fn(tf.nn.xw_plus_b(h, w, b))
                # 下一轮input数量设为已迭代的所有单元的个数;    
                n_in = s
            # 获取参数;
            w, b = self._get_vars(n_in, self.input_dim, name="out_mean")
            # 使用sigmoid函数构造伯努利分布;
            x_reconstr_mean = tf.nn.sigmoid(tf.nn.xw_plus_b(h, w, b))
            # 返回结果;
            return x_reconstr_mean

    def _get_vars(self, n_in, n_out, name=""):
        """构造权重和偏置参数值"""
        # 域名;
        with tf.variable_scope(name):
            # 构造权重;
            w = tf.get_variable("w", [n_in, n_out], initializer=tf.contrib.layers.xavier_initializer())
            # 构造偏置;
            b = tf.get_variable("b", [n_out,], initializer=tf.constant_initializer(0.1))
            # 返回结果;
            return w, b

if __name__ == "__main__":
    # 训练参数;
    # 迭代次数;
    n_epochs = 30
    # 学习率;
    lr = 0.001
    # 单批数量;
    batch_size = 100
    # 打印设置;
    display_every = 1

    # 当前路径;
    path = sys.path[0]
    # 读取mnis数据;
    mnist = read_data_sets("MNIST_data/", one_hot=True)
    # 开始训练;
    with tf.Session() as sess:
        # 创建模型实例;
        vae = VAE(input_dim=784, z_dim=2, batch_size=batch_size, encoder_hidden_size=[500, 500],
                    decoder_hidden_size=[500, 500], act_fn=tf.nn.softplus)
        # 参数初始化;
        sess.run(tf.global_variables_initializer())
        # 存储模型操作句柄;
        saver = tf.train.Saver()
        # 存储模型;
        #saver.restore(sess, save_path=path+"/model/model.ckpt")
        # 开始训练;
        print("Start training...")
        # 批次数;
        total_batch = int(mnist.train.num_examples/batch_size)
        # 开始迭代;
        for epoch in range(n_epochs):
            # 声明平均损失;
            avg_cost = 0.0
            # 每一批次;
            for i in range(total_batch):
                # 获取用于当前批次训练的数据;
                batch_xs, _ = mnist.train.next_batch(batch_size)
                # 进行cost计算、训练操作;
                c, _ = sess.run([vae.cost, vae.train_op], feed_dict={vae.x: batch_xs})
                # avg_cost累计;
                avg_cost += c/total_batch
            # 到了打印详情的轮;    
            if epoch % display_every == 0:
                # 存储一次模型进度;
                save_path = saver.save(sess, path+"/model/model.ckpt")
                # 打印存储路径;
                #print("\tModel saved in file: {0}".format(save_path))
                # 打印训练详情;
                print("\tEpoch {0}, cost {1}".format(epoch, avg_cost))

        # 采样;
        # 获取当前批次test数据;
        x_sample, _ = mnist.test.next_batch(batch_size)
        # x_reconstr是模型重新解码出来的结果;
        x_reconstr = sess.run(vae.x_reconstr_mean, feed_dict={vae.x: x_sample})
        # 画图;
        plt.figure(figsize=(8, 12))
        # 画五个数字的encoder-decoder生成结果;
        for i in range(5):
            # 这是数据集的样本;
            plt.subplot(5, 2, 2*i + 1)
            plt.imshow(np.reshape(x_sample[i],(28, 28)), vmin=0, vmax=1, cmap="gray")
            plt.title("Test input")
            plt.colorbar()
            # 这是模型生成的伪样本;
            plt.subplot(5, 2, 2*i + 2)
            plt.imshow(np.reshape(x_reconstr[i], [28, 28]), vmin=0, vmax=1, cmap="gray")
            plt.title("Reconstruction")
            plt.colorbar()
        # 画线分割;   
        plt.tight_layout()
        # 存储结果;
        plt.savefig(path+"/results/img_epoch{0}.jpg".format(n_epochs))
        # 显示结果;
        plt.show()

        # 随机采样;
        # 范围大小;
        nx, ny = 20, 20
        # xs、ys的序列范围;
        xs = np.linspace(-3, 3, nx)
        ys = np.linspace(-3, 3, ny)
        # np.meshgrid从坐标向量返回坐标矩阵:用于下一步画zs(xs,ys)的三维图;
        xs, ys = np.meshgrid(xs, ys)
        # 维度调整为一维;
        xs = np.reshape(xs, [-1, 1])
        # 维度调整为一维;
        ys = np.reshape(ys, [-1, 1])
        # 数组拼接;axis=1代表拼接方向是(xs;ys)(纵向拼接);
        zs = np.concatenate((xs, ys), axis=1)

        # 生成零矩阵;28*28是字体图片维度;
        canvas = np.zeros((28*ny, 28*nx))
        # 对应图像的零矩阵;
        xs_recon = np.zeros((batch_size*4, 28*28))
        # 取4个批次;
        for i in range(4):
            # 取zs第i到i+1单位批次的数据为z_mu,作为潜变量输入;
            z_mu = zs[batch_size*i:batch_size*(i+1), :]
            # 生成对应伪图像期望;
            x_mean = sess.run(vae.x_reconstr_mean, feed_dict={vae.z: z_mu})
            # 设定xs_recon对应值(本来为零); 
            xs_recon[i*batch_size:(i+1)*batch_size] = x_mean

        # 初始;
        n = 0
        # 开始 nx*ny 步的绘图(每次绘制一个28*28的图像);
        for i in range(nx):
            for j in range(ny):
                # 数据来自xs_recon;
                canvas[(ny-i-1)*28:(ny-i)*28, j*28:(j+1)*28] = xs_recon[n].reshape(28, 28)
                # 下一图;
                n = n + 1

        # 画8*10个手写字体图;
        plt.figure(figsize=(8, 10))
        # 图的外观;
        plt.imshow(anvas, origin="upper", vmin=0, vmax=1, interpolation='none', cmap='gray')
        # 分割线;
        plt.tight_layout()
        # 存储结果;
        plt.savefig(path+"/results/rand_img_epoch{0}.jpg".format(n_epochs))
        # 显示;
        plt.show()

猜你喜欢

转载自blog.csdn.net/hanss2/article/details/81065385