style_transfer_sol.py中文注释

import os
''' 
tensorflow 设置日志级别
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' #默认的显示等级，显示所有信息
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #只显示warning 和 error
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #只显示error
'''
os.environ['TF_CPP_MIN_LOG_LEVEL']='2' #只显示warning 和 error

import time   #记录每次迭代所耗的时间,运行程序后在控制台可以看到时间,单位为second

import numpy as np 
import tensorflow as tf 

import load_vgg_sol  #导入同一个文件夹下的其他py文件,就可以调用其他文件夹下定义的函数了,记得不要加文件后缀名py,否则会报错 No module named 'load_vgg_sol.py'; 'load_vgg_sol' is not a package
import utils  #里面定义了style_transfer_sol.py中需要用到的各种函数

def setup(): #在当前目录下创建checkpints和outputs文件夹
    utils.safe_mkdir('checkpoints') 
    utils.safe_mkdir('outputs')

class StyleTransfer(object):
    def __init__(self, content_img, style_img, img_width, img_height):  #构造函数,用于初始化
        '''
        img_width and img_height are the dimensions we expect from the generated image. #长和宽是我们希望生成图片的维度
        We will resize input content image and input style image to match this dimension. 
        Feel free to alter any hyperparameter here and see how it affects your training. #在这个类中可以试着调各种超参数，  看看是如何影响你的生成结果的
        '''
        self.img_width = img_width
        self.img_height = img_height
        self.content_img = utils.get_resized_image(content_img, img_width, img_height)
        self.style_img = utils.get_resized_image(style_img, img_width, img_height)
        self.initial_img = utils.generate_noise_image(self.content_img, img_width, img_height)

        ###############################
        ## TO DO
        ## create global step (gstep) and hyperparameters for the model
        self.content_layer = 'conv4_2'  #从vgg的conv4_2提取content的特征图用于计算内容损失
        self.style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'] #从'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'提取style的特征图用于计算风格损失
        self.content_w = 1 #0.01  对应论文中 L = a*L1 + β*L2 中的系数，调节这个可以使生成的图片是更侧重content还是更侧重style,加入content为1,style的系数为0,那生成的图片就和内容更接近
        self.style_w = 0  #1
        self.style_layer_w = [0.5, 1.0, 1.5, 3.0, 4.0] #'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1' 每层计算损失时的系数，越后越大，因为深层的纹理信息更多
        self.gstep = tf.Variable(0, dtype=tf.int32, 
                                trainable=False, name='global_step') #trainable = false就是训练的时候不会更新该值
        self.lr = 2.0  #learing rate = 2.0
        ###############################

    def create_input(self):
        '''
        We will use one input_img as a placeholder for the content image, 
        style image, and generated image, because:
            1. they have the same dimension  # content图，style图，generated图三个维度一致
            2. we have to extract the same set of features from them
        We use a variable instead of a placeholder because we're, at the same time, 
        training the generated image to get the desirable result.

        Note: image height corresponds to number of rows, not columns.
        '''
        with tf.variable_scope('input') as scope:  #variable_scope()常和tf.get_variable()一起使用,用于变量间的共享
            self.input_img = tf.get_variable('in_img', 
                                        shape=([1, self.img_height, self.img_width, 3]),
                                        dtype=tf.float32,
                       					   initializer=tf.zeros_initializer()) #initializer = tf.zeros_initializer() 即全部初始化为0
    
	def load_vgg(self): #载入预训练好的vgg模型
        '''
        Load the saved model parameters of VGG-19, using the input_img
        as the input to compute the output at each layer of vgg. 

        During training, VGG-19 mean-centered all images and found the mean pixels
        to be [123.68, 116.779, 103.939] along RGB dimensions. We have to subtract
        this mean from our images. #VGG-19需要对输入图片进行一步预处理,把每个像素点的取值减去训练集算出来的RGB均值,即去均值

        '''
        self.vgg = load_vgg_sol.VGG(self.input_img)  
        self.vgg.load()
        self.content_img -= self.vgg.mean_pixels #图片预处理，减去均值[123.68, 116.779, 103.939]
        self.style_img -= self.vgg.mean_pixels

    def _content_loss(self, P, F): #定义内容损失
        ''' Calculate the loss between the feature representation of the
        content image and the generated image.
        
        Inputs: 
            P: content representation of the content image
            F: content representation of the generated image
            Read the assignment handout for more details

            Note: Don't use the coefficient 0.5 as defined in the paper.
            Use the coefficient defined in the assignment handout(讲义可以私信我发给你).
        '''
        # self.content_loss = None
        ###############################
        ## TO DO
        self.content_loss = tf.reduce_sum((F - P) ** 2) / (4.0 * P.size) #和原论文对应
        ###############################
    
    def _gram_matrix(self, F, N, M): #求出用于计算风格损失的gram矩阵
        """ Create and return the gram matrix for tensor F
            Hint: you'll first have to reshape F
        """
        ###############################
        ## TO DO
        F = tf.reshape(F, (M, N)) #F的维度为M by N
        return tf.matmul(tf.transpose(F), F)  #F乘以F的转置求得gram矩阵
        ###############################

    def _single_style_loss(self, a, g):  #这是某一层的风格损失，总的风格损失要加上5层的，分别为'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'
        """ Calculate the style loss at a certain layer
        Inputs:
            a is the feature representation of the style image at that layer  #a是原图在这一层的风格特征
            g is the feature representation of the generated image at that layer #g是生成图像在这一层的风格特征
        Output: 
            the style loss at a certain layer (which is E_l in the paper)
        Hint: 1. you'll have to use the function _gram_matrix()
            2. we'll use the same coefficient for style loss as in the paper
            3. a and g are feature representation, not gram matrices
        """
        ###############################
        ## TO DO
        N = a.shape[3] # number of filters ，shape[3]是滤波器的个数
        M = a.shape[1] * a.shape[2] # height times width of the feature map 特征图的长乘宽
        A = self._gram_matrix(a, N, M)
        G = self._gram_matrix(g, N, M)
        return tf.reduce_sum((G - A) ** 2 / ((2 * N * M) ** 2))  #求出这一层的损失
        ###############################

    def _style_loss(self, A):  # 'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1' 一共五层总的损失
        """ Calculate the total style loss as a weighted sum 
        of style losses at all style layers
        Hint: you'll have to use _single_style_loss()
        """
        n_layers = len(A) #这里返回4
        E = [self._single_style_loss(A[i], getattr(self.vgg, self.style_layers[i])) for i in range(n_layers)] #0 1 2 3 4 一共五层
        
        ###############################
        ## TO DO
        self.style_loss = sum([self.style_layer_w[i] * E[i] for i in range(n_layers)])
        ###############################

    def losses(self):
        with tf.variable_scope('losses') as scope:
            with tf.Session() as sess:  #可以有多个tf.Session()同时运行,tensorflow擅长的就是并行运算
                # assign content image to the input variable
                sess.run(self.input_img.assign(self.content_img)) 
                gen_img_content = getattr(self.vgg, self.content_layer) #函数getattr(object,name[,default])
                content_img_content = sess.run(gen_img_content)
            self._content_loss(content_img_content, gen_img_content)

            with tf.Session() as sess:
                sess.run(self.input_img.assign(self.style_img)) #self.style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'] 
                style_layers = sess.run([getattr(self.vgg, layer) for layer in self.style_layers])                              
            self._style_loss(style_layers)

            ##########################################
            ## TO DO: create total loss. 
            ## Hint: don't forget the weights for the content loss and style loss
            self.total_loss = self.content_w * self.content_loss + self.style_w * self.style_loss #内容和风格总的损失
            ##########################################

    def optimize(self):  #定义优化器，这里和原来论文有区别，这里使用的的AdamOptimizer
        ###############################
        ## TO DO: create optimizer
        self.opt = tf.train.AdamOptimizer(self.lr).minimize(self.total_loss,
                                                            global_step=self.gstep)
        ###############################

    def create_summary(self): #这里定义summary概述图，可以用tensorboard可视化，用于论文作图和观察损失变化等
        ###############################
        ## TO DO: create summaries for all the losses #比如可以显示每次迭代后的损失变化
        ## Hint: don't forget to merge them
        with tf.name_scope('summaries'):
            tf.summary.scalar('content loss', self.content_loss)
            tf.summary.scalar('style loss', self.style_loss)
            tf.summary.scalar('total loss', self.total_loss)
            self.summary_op = tf.summary.merge_all()  #执行这一个就相当于执行了上面三行，免得一个个的调用
        ###############################


    def build(self):
        self.create_input()  #创建输入，载入网络，计算损失，反向传播优化网络，绘出损失图
        self.load_vgg()
        self.losses()
        self.optimize()
        self.create_summary()

    def train(self, n_iters):#n_iters 定义迭代次数
        skip_step = 1
        with tf.Session() as sess:
            
            ###############################
            ## TO DO: 
            ## 1. initialize your variables
            ## 2. create writer to write your graph
            sess.run(tf.global_variables_initializer())   #初始化所有变量
            writer = tf.summary.FileWriter('graphs/style_stranfer', sess.graph) #保存到graphs/style_transfer文件夹下
            ###############################
            sess.run(self.input_img.assign(self.initial_img)) #initial_img是噪声图像


            ###############################
            ## TO DO: 
            ## 1. create a saver object
            ## 2. check if a checkpoint exists, restore the variables
            saver = tf.train.Saver()  #训练网络后想保存训练好的模型，以及在程序中读取以保存的训练好的模型，保存和恢复都需要实例化一个tf.train.Saver()
            ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/style_transfer/checkpoint'))#然后，在训练循环中，定期调用saver.save()方法，向文件夹中写入包含了当前模型汇中所有可训练变量的checkpoint文件
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            ##############################

            initial_step = self.gstep.eval()  #记得测一下，占时不加注释
            
            start_time = time.time()
            for index in range(initial_step, n_iters):
                if index >= 5 and index < 20:
                    skip_step = 10   #控制台多少step显示一次
                elif index >= 20:
                    skip_step = 20
                
                sess.run(self.opt)
                if (index + 1) % skip_step == 0:
                    ###############################
                    ## TO DO: obtain generated image, loss, and summary
                    gen_image, total_loss, summary = sess.run([self.input_img,
                                                                self.total_loss,
                                                                self.summary_op])

                    ###############################
                    
                    # add back the mean pixels we subtracted before
                    gen_image = gen_image + self.vgg.mean_pixels #之前减了一次RGB均值，这里加回来
                    writer.add_summary(summary, global_step=index)
                    print('Step {}\n   Sum: {:5.1f}'.format(index + 1, np.sum(gen_image)))
                    print('   Loss: {:5.1f}'.format(total_loss))
                    print('   Took: {} seconds'.format(time.time() - start_time))
                    start_time = time.time()

                    filename = 'outputs/%d.png' % (index)
                    utils.save_image(filename, gen_image)

                    if (index + 1) % 20 == 0:
                        ###############################
                        ## TO DO: save the variables into a checkpoint
                        saver.save(sess, 'checkpoints/style_stranfer/style_transfer', index)
                        ###############################

if __name__ == '__main__':   #函数从main这开始执行
    setup()
    machine = StyleTransfer('content/transp.jpg', 'styles/guernica.jpg', 333, 250)
    machine.build()
    machine.train(300)  #这里给了迭代次数
今天花几个小时注释了下代码，方便之后查阅，做个记录。
style_transfer_sol.py中文注释

猜你喜欢