Simple records: AVE (Variational Autoencoder), pytorch + MNIST

introduce

Introducing this blog in detail

Variational Autoencoder (1): That's what it is - Scientific Spaces | Scientific Spaces

loss function

##变分自动编码器的损失函数
## 均方差  +  KL divergence
reconstruction_function = nn.MSELoss(size_average=False)
def loss_function(recon_x, x, mu, logvar):
    MSE = reconstruction_function(recon_x, x)

    ## loss = 0.5 * sum( 1 +  log(sigma^2)  -  mu^2  -  sigma^2 )
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)

    return MSE + KLD

NET

class AVE(nn.Module):
    def __init__(self):
        super(AVE, self).__init__()

        self.fc1 = nn.Linear(784, 400)

        self.fc21 = nn.Linear(400, 20)
        self.fc22 = nn.Linear(400, 20)

        self.fc3 = nn.Linear(20, 400)

        self.fc4 = nn.Linear(400, 784)

    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)

    def reparameteize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()    # 矩阵点对点相乘之后再把这些元素作为e的指数
        eps = torch.FloatTensor(std.size()).normal_()   # 生成随机数组
        if torch.cuda.is_available():
            eps = torch.autograd.Variable(eps.cuda())
        else:
            eps = torch.autograd.Variable(eps)
        # 用一个标准正态分布乘标准差,再加上均值,使隐含向量变为正太分布
        return eps.mul(std).add_(mu)

    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        return F.tanh(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x)         # 编码
        z = self.reparameteize(mu, logvar)  # 重新参数化成正态分布
        return self.decode(z), mu, logvar   # 解码,同时输出均值方差

Train

import torch
import six_Net
import torch.nn as nn
import tqdm
import os

from torch.autograd import Variable
from torchvision.transforms import transforms
from torchvision.datasets import mnist
from torch.utils.data import DataLoader
from torch import optim
from torchvision.utils import save_image

##定义参数
batch_size = 512
epoches = 100

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

im_tfs = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

train_set = mnist.MNIST('./data', train=True, transform=im_tfs, download=False)
train_data = DataLoader(train_set, batch_size=batch_size, shuffle=True)

model = six_Net.AVE().to(device)

# x, _ = train_set[0]
# x = x.view(x.shape[0], -1)
# if torch.cuda.is_available():
#     x = x.cuda()
# x = Variable(x)
# a, mu, var = model(x)
# print('********************')
# print(var)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

def to_img(x):
    '''
    定义一个函数将最后的结果转换回图片
    '''
    x = 0.5 * (x + 1.)
    x = x.clamp(0, 1)       #设定最小为0 ,最大为1
    x = x.view(x.shape[0], 1, 28, 28)
    return x

##开始训练
print('开始训练')
for epoch in range(epoches):
    for im, a in tqdm.tqdm(train_data):
        im = im.view(im.shape[0], -1)
        im = Variable(im).to(device)

        ##前向传播
        recon_im, mu, logvar = model(im)
        # loss = criterion(out, im) / im.shape[0]
        loss = six_Net.loss_function(recon_im, im, mu, logvar) / im.shape[0]

        ##反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch +1 ) % 1 == 0:
        print()
        print(f'训练第 {epoch + 1} 个 epoch 中,loss : {loss}')
        pic = to_img(recon_im.cpu().data)
        if not os.path.exists('./out'):
            os.mkdir('./out')
        save_image(pic, f'./out/image_{epoch + 1}.png')

torch.save(model.state_dict(), './params/AVE_encoder.pth')

Summarize

I personally feel that the loss rate is quite high, about 60, probably because this loss function is composed of: MSE + KLD. But looking at the picture, the effect is not bad

Guess you like

Origin blog.csdn.net/qq_42792802/article/details/126131759