GAN learning process (3)

This is the real beginning. The picture is input instead of a string of noise, and the output result is the image after the image is translated.

Overall network model: for all GANs

inputs
generator
discriminate
loss
optimizer
train
test

import tensorflow as tf
import os
import glob
from matplotlib import pyplot as plt
%matplotlib inline
import time
from IPython import display
imgs_path = glob.glob(r'D:\BaiduNetdiskDownload\cityscapes_data\train\*.jpg')
def read_jpg(path):##加载图像的参数
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    return img
    
def normalize(input_image, input_mask):#标准化函数
    input_image = tf.cast(input_image, tf.float32)/127.5 - 1#规范到[-1,1]之间
    input_mask = tf.cast(input_mask, tf.float32)/127.5 - 1
    return input_image, input_mask

@tf.function
def load_image(image_path):#加载函数
    image = read_jpg(image_path)#得到tensor
    w = tf.shape(image)[1]#得到宽度
    w = w // 2#整除2,为了将两张图片进行分割
    input_image = image[:, :w, :]#第一维 high 第二位w 第三维度 channels
    input_mask = image[:, w:, :]
    input_image = tf.image.resize(input_image, (256, 256))#使none显示出大小
    input_mask = tf.image.resize(input_mask, (256, 256))
    
    if tf.random.uniform(()) > 0.5:#有一半的几率做同时反转 uniform会产生从0到1的数据
        input_image = tf.image.flip_left_right(input_image)
        input_mask = tf.image.flip_left_right(input_mask)

    input_image, input_mask = normalize(input_image, input_mask)#归一化

    return input_mask, input_image

dataset = tf.data.Dataset.from_tensor_slices(imgs_path)#创建dataset
train = dataset.map(load_image)
train=dataset.map(load_image,num_parallel_calls=tf.data.experimental.AUTOTUNE)#加载,转换图像
BATCH_SIZE = 64
BUFFER_SIZE = len(imgs_path)#机器好用len(image_path)size选64
train_dataset = train.shuffle(BUFFER_SIZE).batch(BATCH_SIZE)#乱序
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)#用GPU时加载批次时和CPU会加载另一个批次
imgs_path_test = glob.glob(r'D:\BaiduNetdiskDownload\cityscapes_data\val\*.jpg')
dataset_test = tf.data.Dataset.from_tensor_slices(imgs_path_test)
def load_image_test(image_path):#test不需要做反转
    image = read_jpg(image_path)
    w = tf.shape(image)[1]
    w = w // 2
    input_image = image[:, :w, :]
    input_mask = image[:, w:, :]
    input_image = tf.image.resize(input_image, (256,256))
    input_mask = tf.image.resize(input_mask, (256,256))
    
    input_image, input_mask = normalize(input_image, input_mask)

    return input_mask, input_image
    dataset_test = dataset_test.map(load_image_test)#在test数据集上应用load image 方法
    dataset_test = dataset_test.batch(BATCH_SIZE)
    OUTPUT_CHANNELS = 3
    def downsample(filters, size, apply_batchnorm=True):#方便调用(卷积核个数,卷积核大小,是否使用BN默认添加)
#    initializer = tf.random_normal_initializer(0., 0.02)

    result = tf.keras.Sequential()#创建一个模型
    result.add(#添加卷积层
        tf.keras.layers.Conv2D(filters, size, strides=2, padding='same',
                               use_bias=False))#在生成器和判别器当中,提取特征将图像小
    #不使用pooling,而使用strides 生成器中引入maxpool会导致梯度不连续,从而影响训练

    if apply_batchnorm:#生成器第一层不用BN
        result.add(tf.keras.layers.BatchNormalization())

        result.add(tf.keras.layers.LeakyReLU())#生成器用LRELU

    return result
    def upsample(filters, size, apply_dropout=False):#dropout并不是为了解决过拟合问题,为了能够增加生成图像的多样性
#    initializer = tf.random_normal_initializer(0., 0.02)

    result = tf.keras.Sequential()
    result.add(
        tf.keras.layers.Conv2DTranspose(filters, size, strides=2,
                                        padding='same',
                                        use_bias=False))

    result.add(tf.keras.layers.BatchNormalization())#BN是一定添加的

    if apply_dropout:
        result.add(tf.keras.layers.Dropout(0.5))

    result.add(tf.keras.layers.ReLU())#上采样使用relu激活,下采样使用lrelu

    return result
    def Generator():
    inputs = tf.keras.layers.Input(shape=[256,256,3])

    down_stack = [
        downsample(64, 3, apply_batchnorm=False), #G第一层不要使用BN 128*128*64
        downsample(128, 3), # (bs, 64, 64, 128)
        downsample(256, 3), # (bs, 32, 32, 256)
        downsample(512, 3), # (bs, 16, 16, 512)
        downsample(512, 3), # (bs, 8, 8, 512)
        downsample(512, 3), # (bs, 4, 4, 512)
        downsample(512, 3), # (bs, 2, 2, 512)
        downsample(512, 3), # (bs, 1, 1, 512)
    ]

    up_stack = [
        upsample(512, 3, apply_dropout=True), # (bs, 2, 2, 1024)
        upsample(512, 3, apply_dropout=True), # (bs, 4, 4, 512)
        upsample(512, 3, apply_dropout=True), # (bs, 8, 8, 512)
        upsample(512, 3), # (bs, 16, 16, 512)
        upsample(256, 3,), # (bs, 32, 32, 256)
        upsample(128, 3,), # (bs, 64, 64, 128)
        upsample(64, 3), # (bs, 128, 128, 64)
    ]

#    initializer = tf.random_normal_initializer(0., 0.02)
    last = tf.keras.layers.Conv2DTranspose(OUTPUT_CHANNELS, 3,
                                         strides=2,
                                         padding='same',
                                         activation='tanh') # (bs, 64, 64, 3)

    x = inputs#x可变 输入不会变

    # Downsampling through the model
    skips = []#Unet中存在skip connection 我们把中间采样的值放到一个空列表skip中
    for down in down_stack:#对列表进行迭代
        x = down(x)
        skips.append(x)

    skips = reversed(skips[:-1])#我们需要反转一下才能够调用,【:-1】去掉最后一层

    # Upsampling and establishing the skip connections
    for up, skip in zip(up_stack, skips):# up skip为变量
        x = up(x)
        x = tf.keras.layers.Concatenate()([x, skip])#将x与skip中保留的结果进行合并
#concatenate与add不用 add是单纯的像素相加 x为128*128*128
    x = last(x)

    return tf.keras.Model(inputs=inputs, outputs=x)
    generator = Generator()
    LAMBDA = 10#超参数
    def generator_loss(disc_generated_output, gen_output, target):
    gan_loss = loss_object(tf.ones_like(disc_generated_output), disc_generated_output)

    # mean absolute errorl1损失 重建损失
    l1_loss = tf.reduce_mean(tf.abs(target - gen_output))#tf.reduce_mean生成标量值

    total_gen_loss = gan_loss + (LAMBDA * l1_loss)#增加l1损失的比重

    return total_gen_loss, gan_loss, l1_loss
    def Discriminator():#判别器要输入成对的图像 生成的图像轮廓信息要接近输入图像
    #因为我们的目的是仅仅对图像的色彩方法进行改变
    #论文中使用patch-D使用格子将图像分为小格子,然后对小格子的图像分别进行判断
#    initializer = tf.random_normal_initializer(0., 0.02)

    inp = tf.keras.layers.Input(shape=[256, 256, 3], name='input_image')
    tar = tf.keras.layers.Input(shape=[256, 256, 3], name='target_image')

    x = tf.keras.layers.concatenate([inp, tar]) # (bs, 64, 64, channels*2) (256*256*6)小写可以用参数写

    down1 = downsample(64, 3, False)(x) # (bs, 128, 128, 64)
    down2 = downsample(128, 3)(down1) # (bs, 64, 64, 128)
    down3 = downsample(256, 3)(down2) # (bs, 32, 32, 256)

    conv = tf.keras.layers.Conv2D(512, 3, strides=1,
                                  padding='same',
                                  use_bias=False)(down3) # (bs, 31,31,512)#跨度为1,图像不会变小

    batchnorm1 = tf.keras.layers.BatchNormalization()(conv)

    leaky_relu = tf.keras.layers.LeakyReLU()(batchnorm1)

    last = tf.keras.layers.Conv2D(1, 3, strides=1)(leaky_relu) # (bs, 30, 30, 1)

    return tf.keras.Model(inputs=[inp, tar], outputs=last)#对结果进行判断
    discriminator = Discriminator()
    loss_object = tf.keras.losses.BinaryCrossentropy(from_logits=True)#输入未激活数据
    def discriminator_loss(disc_real_output, disc_generated_output):
    real_loss = loss_object(tf.ones_like(disc_real_output), disc_real_output)
#真实判定1 生成判定0
    generated_loss = loss_object(tf.zeros_like(disc_generated_output), disc_generated_output)

    total_disc_loss = real_loss + generated_loss

    return total_disc_loss
    generator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
discriminator_optimizer = tf.keras.optimizers.Adam(2e-4, beta_1=0.5)
def generate_images(model, test_input, tar):# tar真实图像  绘图函数
    prediction = model(test_input, training=True)
    plt.figure(figsize=(15, 15))创建画布

    display_list = [test_input[0], tar[0], prediction[0]]
    title = ['Input Image', 'Ground Truth', 'Predicted Image']

    for i in range(3):
        plt.subplot(1, 3, i+1)
        plt.title(title[i])
    # getting the pixel values between [0, 1] to plot it.
        plt.imshow(display_list[i] * 0.5 + 0.5)#因为图像都归一到-1,1之间了,需要还原成原图利用*0.5+0.5
        #因为归一话的时候是先减去平均值0.5 ,然后再除以标准偏差0.5 那么反归一化就是先乘以0.5,再加0.5。
        plt.axis('off')
    plt.show()
EPOCHS = 110
@tf.function
def train_step(input_image, target, epoch):#接受一个批次的数据,然后优化我们的变量
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:#两个模型,所以调用两个上下文管理器
        gen_output = generator(input_image, training=True)

        disc_real_output = discriminator([input_image, target], training=True)
        disc_generated_output = discriminator([input_image, gen_output], training=True)#对于生成的图像,判别器的结果

        gen_total_loss, gen_gan_loss, gen_l1_loss = generator_loss(disc_generated_output, gen_output, target)
        disc_loss = discriminator_loss(disc_real_output, disc_generated_output)
#梯度
    generator_gradients = gen_tape.gradient(gen_total_loss,
                                          generator.trainable_variables)
    discriminator_gradients = disc_tape.gradient(disc_loss,
                                               discriminator.trainable_variables)
#优化
    generator_optimizer.apply_gradients(zip(generator_gradients,
                                          generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(discriminator_gradients,
                                              discriminator.trainable_variables))
                                              def fit(train_ds, epochs, test_ds):
    for epoch in range(epochs+1):
        if epoch%10 == 0:#
            for example_input, example_target in test_ds.take(1):#从test 数据集里面取出一个批次的图像
                generate_images(generator, example_input, example_target)
        print("Epoch: ", epoch)

        for n, (input_image, target) in train_ds.enumerate():
            if n%10 == 0:
                print('.', end='')
            train_step(input_image, target, epoch)
        print()
          fit(train_dataset, EPOCHS, dataset_test)                                    

Guess you like

Origin blog.csdn.net/HenryFanQAQ/article/details/109200344