自定义ResNet神经网络-Tensorflow【cifar100分类数据集】

import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 放在 import tensorflow as tf 之前才有效

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, datasets, Sequential
#==========================================自定义ResNet神经网络：开始==========================================
# 两层的残差学习单元 BasicBlock [(3×3)-->(3×3)]形状，如果是三层的BasicBlock，形状则为：[(1×1)-->(3×3)-->(1×1)]
# stride>1时(比如stride=2)，则通过改层Layer后的FeatureMap的大小减半。strides: An integer or tuple/list of 2 integers
class BasicBlock(layers.Layer):
    def __init__(self, filter_count, stride=1):
        super(BasicBlock, self).__init__()
        # ================================F(x) 部分================================
        # Layer01
        self.conv1 = layers.Conv2D(filters=filter_count, kernel_size=[3, 3], strides=stride, padding='same')  # 如果padding='same'&stride!=1：输出维度是输入维度的stride分之一。
        self.bn1 = layers.BatchNormalization()
        self.relu = layers.Activation('relu')
        # Layer02
        self.conv2 = layers.Conv2D(filters=filter_count, kernel_size=[3, 3], strides=1, padding='same')  # padding='same'&stride==1：输出维度与输入维度一致。
        self.bn2 = layers.BatchNormalization()
        # ================================identity(x)部分================================
        if stride != 1:  # 如果 stride != 1，F(x)部分的输入维度减小stride倍。所以利用一层大小为[1×1×filter_count]的卷积层identity_layer设置strides与F(x)部分的stride一致，将输入值x的维度调整为和F(x)的维度一致，即进行SubSampling。然后再进行加和计算 H(x)=x+F(X)
            self.identity_layer = layers.Conv2D(filters=filter_count, kernel_size=[1, 1], strides=stride)
        else:  # 如果 stride = 1，则F(x)输出值与输入值x的维度保持不变(必须保证F(x)部分的padding='same'才能维度不变)。所以identity_layer部分直接可以和F(x)部分进行加和计算，不需要经过卷积层对x进行维度调整。也可减少参数的使用。
            self.identity_layer = lambda x: x  # lambda匿名函数：输入为x，return x

    def call(self, inputs, training=None):
        # 前向传播  # [b, h, w, c]
        # ================================F(x) 部分================================
        # Layer01
        F_out = self.conv1(inputs)
        F_out = self.bn1(F_out)
        F_out = self.relu(F_out)
        # Layer02
        F_out = self.conv2(F_out)
        F_out = self.bn2(F_out)
        # ================================identity部分================================
        # x=identity(x)
        identity_out = self.identity_layer(inputs)
        # ================================H(x)=F(x)+x================================
        basic_block_output = layers.add([F_out, identity_out])  # layers.add(): A tensor as the sum of the inputs. It has the same shape as the inputs.
        basic_block_output = tf.nn.relu(basic_block_output)

        return basic_block_output


# 由多个BasicBlock组成的ResidualBlock
class ResidualBlock:
    def __init__(self, filter_count, residualBlock_size, stride=1):
        self.filter_count = filter_count
        self.residualBlock_size = residualBlock_size
        self.stride = stride

    def __call__(self):
        basic_block_stride_not_1 = BasicBlock(self.filter_count, stride=self.stride)  # stride != 1 时的BasicBlock H(x)=x+F(X)，identity_layer进行SubSampling
        basic_block_stride_1 = BasicBlock(self.filter_count, stride=1)  # stride = 1 时的BasicBlock H(x)=x+F(X)，identity_layer层的输出为直接返回输入
        residualBlock = Sequential()
        residualBlock.add(basic_block_stride_not_1)  # 有一个BasicBlock必须是 stride != 1 时的BasicBlock
        for _ in range(1, self.residualBlock_size):  # 其余的BasicBlock都是 stride == 1 时的BasicBlock
            residualBlock.add(basic_block_stride_1)
        return residualBlock

# 由多个ResidualBlock组成的ResidualNet
# residualBlock_size_list：[2, 2, 2, 2]  表示该ResidualNet含有4个ResidualBlock，每个ResidualBlock包含2个BasicBlock
# residualBlock_size_list：[3, 4, 6, 3]  表示该ResidualNet含有4个ResidualBlock，第1个ResidualBlock包含3个BasicBlock，第2个ResidualBlock包含4个BasicBlock，第3个ResidualBlock包含6个BasicBlock，第4个ResidualBlock包含3个BasicBlock
class ResidualNet(keras.Model):
    def __init__(self, residualBlock_size_list, class_count=100):   # class_count：表示全连接层的输出维度，取决于数据集分类的类别总数量(cifar100为100类)
        super(ResidualNet, self).__init__()
        # ================================预处理Block================================
        self.preprocessBlock = Sequential([layers.Conv2D(filters=50, kernel_size=[3, 3], strides=(1, 1)),
                                            layers.BatchNormalization(),
                                            layers.Activation('relu'),
                                            layers.MaxPool2D(pool_size=(2, 2), strides=(1, 1), padding='same')
                                            ])
        # ================================所有ResidualBlock================================
        residualBlock01_size = residualBlock_size_list[0]
        residualBlock02_size = residualBlock_size_list[1]
        residualBlock03_size = residualBlock_size_list[2]
        residualBlock04_size = residualBlock_size_list[3]
        self.residualBlock1 = ResidualBlock(50, residualBlock01_size, stride=1)()  # 第01个ResidualBlock，包含residualBlock01_size个BasicBlock，residualBlock1设置为64通道
        self.residualBlock2 = ResidualBlock(150, residualBlock02_size, stride=2)()  # 第02个ResidualBlock，包含residualBlock02_size个BasicBlock，residualBlock2设置为128通道
        self.residualBlock3 = ResidualBlock(300, residualBlock03_size, stride=2)()  # 第03个ResidualBlock，包含residualBlock03_size个BasicBlock，residualBlock3设置为256通道
        self.residualBlock4 = ResidualBlock(500, residualBlock04_size, stride=2)()  # 第04个ResidualBlock，包含residualBlock04_size个BasicBlock，residualBlock4设置为512通道
        # ================================输出层================================
        # output: [b, h, w, 500] 以上步骤输出的FeatureMap的大小[h,w]不太方便计算
        self.avgpool_Layer = layers.GlobalAveragePooling2D()    # 不管输入的每一个FeatureMap的大小[h,w]是多少，取每一个FeatureMap上的所有元素的平均值作为输出。所以该步骤输出的数据维度为[1,500]
        # 将上一层的维度为[1,500]的输出传给全连接层进行分类，输出维度为[1,class_count]
        self.fullcon_Layer = layers.Dense(class_count)

    def call(self, inputs, training=None):
        # ================================预处理Block================================
        x = self.preprocessBlock(inputs)   # 输出维度：[b, h, w, 50]
        # ================================所有ResidualBlock================================
        x = self.residualBlock1(x)   # 输出维度：[b, h, w, 50]
        x = self.residualBlock2(x)   # 输出维度：[b, h, w, 150]
        x = self.residualBlock3(x)   # 输出维度：[b, h, w, 300]
        x = self.residualBlock4(x)   # 输出维度：[b, h, w, 500]
        # ================================输出层================================
        x = self.avgpool_Layer(x)   # 输出维度：[b, 500]
        x = self.fullcon_Layer(x)   # 输出维度：[b, 100]

        return x

def resnet18():
    return ResidualNet([2, 2, 2, 2])

def resnet34():
    return ResidualNet([3, 4, 6, 3])
    
#==========================================自定义ResNet神经网络：结束==========================================

# 一、获取数据集
(X_train, Y_train), (X_val, Y_val) = datasets.cifar100.load_data()
print('X_train.shpae = {0}，Y_train.shpae = {1}------------type(X_train) = {2}，type(Y_train) = {3}'.format(X_train.shape, Y_train.shape, type(X_train), type(Y_train)))
Y_train = tf.squeeze(Y_train)
Y_val = tf.squeeze(Y_val)
print('X_train.shpae = {0}，Y_train.shpae = {1}------------type(X_train) = {2}，type(Y_train) = {3}'.format(X_train.shape, Y_train.shape, type(X_train), type(Y_train)))


# 二、数据处理
# 预处理函数：将numpy数据转为tensor
def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    return x, y


# 2.1 处理训练集
# print('X_train.shpae = {0}，Y_train.shpae = {1}------------type(X_train) = {2}，type(Y_train) = {3}'.format(X_train.shape, Y_train.shape, type(X_train), type(Y_train)))
db_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))  # 此步骤自动将numpy类型的数据转为tensor
db_train = db_train.map(preprocess)  # 调用map()函数批量修改每一个元素数据的数据类型
# 从data数据集中按顺序抽取buffer_size个样本放在buffer中，然后打乱buffer中的样本。buffer中样本个数不足buffer_size，继续从data数据集中安顺序填充至buffer_size，此时会再次打乱。
db_train = db_train.shuffle(buffer_size=1000)  # 打散db_train中的样本顺序，防止图片的原始顺序对神经网络性能的干扰。
print('db_train = {0}，type(db_train) = {1}'.format(db_train, type(db_train)))
batch_size_train = 500  # 每个batch里的样本数量设置100-200之间合适。
db_batch_train = db_train.batch(batch_size_train)  # 将db_batch_train中每sample_num_of_each_batch_train张图片分为一个batch，读取一个batch相当于一次性并行读取sample_num_of_each_batch_train张图片
print('db_batch_train = {0}，type(db_batch_train) = {1}'.format(db_batch_train, type(db_batch_train)))
# 2.2 处理测试集：测试数据集不需要打乱顺序
db_val = tf.data.Dataset.from_tensor_slices((X_val, Y_val))  # 此步骤自动将numpy类型的数据转为tensor
db_val = db_val.map(preprocess)  # 调用map()函数批量修改每一个元素数据的数据类型
batch_size_val = 500  # 每个batch里的样本数量设置100-200之间合适。
db_batch_val = db_val.batch(batch_size_val)  # 将db_val中每sample_num_of_each_batch_val张图片分为一个batch，读取一个batch相当于一次性并行读取sample_num_of_each_batch_val张图片

# 三、构建ResNet神经网络
# 1、构建ResNet神经网络
resnet18_network = resnet18()
resnet18_network.build(input_shape=[None, 32, 32, 3])  # 原始图片维度为：[32, 32, 3]，None表示样本数量，是不确定的值。
# 2、打印神经网络信息
resnet18_network.summary()  # 打印卷积神经网络network的简要信息

# 四、梯度下降优化器设置
optimizer = optimizers.Adam(lr=1e-3)


# 五、整体数据集进行一次梯度下降来更新模型参数，整体数据集迭代一次，一般用epoch。每个epoch中含有batch_step_no个step，每个step中就是设置的每个batch所含有的样本数量。
def train_epoch(epoch_no):
    print('++++++++++++++++++++++++++++++++++++++++++++第{0}轮Epoch-->Training 阶段：开始++++++++++++++++++++++++++++++++++++++++++++'.format(epoch_no))
    for batch_step_no, (X_batch, Y_batch) in enumerate(db_batch_train):  # 每次计算一个batch的数据，循环结束则计算完毕整体数据的一次梯度下降；每个batch的序号一般用step表示(batch_step_no)
        print('epoch_no = {0}, batch_step_no = {1}，X_batch.shpae = {2}，Y_batch.shpae = {3}------------type(X_batch) = {4}，type(Y_batch) = {5}'.format(epoch_no, batch_step_no + 1, X_batch.shape, Y_batch.shape, type(X_batch), type(Y_batch)))
        Y_batch_one_hot = tf.one_hot(Y_batch, depth=100)  # One-Hot编码，共有100类  [] => [b,100]
        print('\tY_train_one_hot.shpae = {0}'.format(Y_batch_one_hot.shape))
        # 梯度带tf.GradientTape：连接需要计算梯度的”函数“和”变量“的上下文管理器（context manager）。将“函数”(即Loss的定义式)与“变量”(即神经网络的所有参数)都包裹在tf.GradientTape中进行追踪管理
        with tf.GradientTape() as tape:
            # Step1. 前向传播/前向运算-->计算当前参数下模型的预测值
            out_logits = resnet18_network(X_batch)  # [b, 32, 32, 3] => [b, 100]
            print('\tout_logits.shape = {0}'.format(out_logits.shape))
            # Step2. 计算预测值与真实值之间的损失Loss：交叉熵损失
            MSE_Loss = tf.losses.categorical_crossentropy(Y_batch_one_hot, out_logits, from_logits=True)    # categorical_crossentropy()第一个参数是真实值，第二个参数是预测值，顺序不能颠倒
            print('\tMSE_Loss.shape = {0}'.format(MSE_Loss.shape))
            MSE_Loss = tf.reduce_mean(MSE_Loss)
            print('\t求均值后：MSE_Loss.shape = {0}'.format(MSE_Loss.shape))
            print('\t第{0}个epoch-->第{1}个batch step的初始时的：MSE_Loss = {2}'.format(epoch_no, batch_step_no + 1, MSE_Loss))
        # Step3. 反向传播-->损失值Loss下降一个学习率的梯度之后所对应的更新后的各个Layer的参数：W1, W2, W3, B1, B2, B3...
        # grads为整个全连接神经网络模型中所有Layer的待优化参数trainable_variables [W1, W2, W3, B1, B2, B3...]分别对目标函数MSE_Loss 在 X_batch 处的梯度值，
        grads = tape.gradient(MSE_Loss, resnet18_network.trainable_variables)  # grads为梯度值。MSE_Loss为目标函数，variables为卷积神经网络、全连接神经网络所有待优化参数，
        # grads, _ = tf.clip_by_global_norm(grads, 15)  # 限幅：解决gradient explosion或者gradients vanishing的问题。
        # print('\t第{0}个epoch-->第{1}个batch step的初始时的参数：'.format(epoch_no, batch_step_no + 1))
        if batch_step_no == 0:
            index_variable = 1
            for grad in grads:
                print('\t\tgrad{0}：grad.shape = {1}，grad.ndim = {2}'.format(index_variable, grad.shape, grad.ndim))
                index_variable = index_variable + 1
        # 进行一次梯度下降
        print('\t梯度下降步骤-->optimizer.apply_gradients(zip(grads, resnet18_network.trainable_variables))：开始')
        optimizer.apply_gradients(zip(grads, resnet18_network.trainable_variables))  # network的所有参数 trainable_variables [W1, W2, W3, B1, B2, B3...]下降一个梯度  w' = w - lr * grad，zip的作用是让梯度值与所属参数前后一一对应
        print('\t梯度下降步骤-->optimizer.apply_gradients(zip(grads, resnet18_network.trainable_variables))：结束\n')
    print('++++++++++++++++++++++++++++++++++++++++++++第{0}轮Epoch-->Training 阶段：结束++++++++++++++++++++++++++++++++++++++++++++'.format(epoch_no))


# 六、模型评估 test/evluation
def evluation(epoch_no):
    print('++++++++++++++++++++++++++++++++++++++++++++第{0}轮Epoch-->Evluation 阶段：开始++++++++++++++++++++++++++++++++++++++++++++'.format(epoch_no))
    total_correct, total_num = 0, 0
    for batch_step_no, (X_batch, Y_batch) in enumerate(db_batch_val):
        print('epoch_no = {0}, batch_step_no = {1}，X_batch.shpae = {2}，Y_batch.shpae = {3}'.format(epoch_no, batch_step_no + 1, X_batch.shape, Y_batch.shape))
        # 根据训练模型计算测试数据的输出值out
        out_logits = resnet18_network(X_batch)   # [b, 32, 32, 3] => [b, 100]
        print('\tout_logits.shape = {0}'.format(out_logits.shape))
        # print('\tout_logits_fullcon[:1,:] = {0}'.format(out_logits_fullcon[:1, :]))
        # 利用softmax()函数将network的输出值转为0~1范围的值，并且使得所有类别预测概率总和为1
        out_logits_prob = tf.nn.softmax(out_logits, axis=1)  # out_logits_prob: [b, 100] ~ [0, 1]
        # print('\tout_logits_prob[:1,:] = {0}'.format(out_logits_prob[:1, :]))
        out_logits_prob_max_index = tf.cast(tf.argmax(out_logits_prob, axis=1), dtype=tf.int32)  # [b, 100] => [b] 查找最大值所在的索引位置 int64 转为 int32
        # print('\t预测值：out_logits_prob_max_index = {0},\t真实值：Y_train_one_hot = {1}'.format(out_logits_prob_max_index, Y_batch))
        is_correct_boolean = tf.equal(out_logits_prob_max_index, Y_batch.numpy())
        # print('\tis_correct_boolean = {0}'.format(is_correct_boolean))
        is_correct_int = tf.cast(is_correct_boolean, dtype=tf.float32)
        # print('\tis_correct_int = {0}'.format(is_correct_int))
        is_correct_count = tf.reduce_sum(is_correct_int)
        print('\tis_correct_count = {0}\n'.format(is_correct_count))
        total_correct += int(is_correct_count)
        total_num += X_batch.shape[0]
    print('total_correct = {0}---total_num = {1}'.format(total_correct, total_num))
    acc = total_correct / total_num
    print('第{0}轮Epoch迭代的准确度： acc = {1}'.format(epoch_no, acc))
    print('++++++++++++++++++++++++++++++++++++++++++++第{0}轮Epoch-->Evluation 阶段：结束++++++++++++++++++++++++++++++++++++++++++++'.format(epoch_no))


# 七、整体数据迭代多次梯度下降来更新模型参数
def train():
    epoch_count = 1  # epoch_count为整体数据集迭代梯度下降次数
    for epoch_no in range(1, epoch_count + 1):
        print('\n\n利用整体数据集进行模型的第{0}轮Epoch迭代开始:**********************************************************************************************************************************'.format(epoch_no))
        train_epoch(epoch_no)
        evluation(epoch_no)
        print('利用整体数据集进行模型的第{0}轮Epoch迭代结束:**********************************************************************************************************************************'.format(epoch_no))


if __name__ == '__main__':
    train()

打印结果：

X_train.shpae = (50000, 32, 32, 3)，Y_train.shpae = (50000, 1)------------type(X_train) = <class 'numpy.ndarray'>，type(Y_train) = <class 'numpy.ndarray'>
X_train.shpae = (50000, 32, 32, 3)，Y_train.shpae = (50000,)------------type(X_train) = <class 'numpy.ndarray'>，type(Y_train) = <class 'tensorflow.python.framework.ops.EagerTensor'>
db_train = <ShuffleDataset shapes: ((32, 32, 3), ()), types: (tf.float32, tf.int32)>，type(db_train) = <class 'tensorflow.python.data.ops.dataset_ops.ShuffleDataset'>
db_batch_train = <BatchDataset shapes: ((None, 32, 32, 3), (None,)), types: (tf.float32, tf.int32)>，type(db_batch_train) = <class 'tensorflow.python.data.ops.dataset_ops.BatchDataset'>
Model: "residual_net"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
sequential (Sequential)      (None, 30, 30, 50)        1600      
_________________________________________________________________
sequential_1 (Sequential)    (None, 30, 30, 50)        91000     
_________________________________________________________________
sequential_2 (Sequential)    (None, 15, 15, 150)       685650    
_________________________________________________________________
sequential_3 (Sequential)    (None, 8, 8, 300)         2886300   
_________________________________________________________________
sequential_4 (Sequential)    (None, 4, 4, 500)         8260500   
_________________________________________________________________
global_average_pooling2d (Gl multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  50100     
=================================================================
Total params: 11,975,150
Trainable params: 11,967,050
Non-trainable params: 8,100
_________________________________________________________________


利用整体数据集进行模型的第1轮Epoch迭代开始:**********************************************************************************************************************************
++++++++++++++++++++++++++++++++++++++++++++第1轮Epoch-->Training 阶段：开始++++++++++++++++++++++++++++++++++++++++++++
epoch_no = 1, batch_step_no = 1，X_batch.shpae = (500, 32, 32, 3)，Y_batch.shpae = (500,)------------type(X_batch) = <class 'tensorflow.python.framework.ops.EagerTensor'>，type(Y_batch) = <class 'tensorflow.python.framework.ops.EagerTensor'>
	Y_train_one_hot.shpae = (500, 100)
	out_logits.shape = (500, 100)
	MSE_Loss.shape = (500,)
	求均值后：MSE_Loss.shape = ()
	第1个epoch-->第1个batch step的初始时的：MSE_Loss = 4.608854293823242
		grad1：grad.shape = (3, 3, 3, 50)，grad.ndim = 4
		grad2：grad.shape = (50,)，grad.ndim = 1
		grad3：grad.shape = (50,)，grad.ndim = 1
		grad4：grad.shape = (50,)，grad.ndim = 1
		grad5：grad.shape = (3, 3, 50, 50)，grad.ndim = 4
		grad6：grad.shape = (50,)，grad.ndim = 1
		grad7：grad.shape = (50,)，grad.ndim = 1
		grad8：grad.shape = (50,)，grad.ndim = 1
		grad9：grad.shape = (3, 3, 50, 50)，grad.ndim = 4
		grad10：grad.shape = (50,)，grad.ndim = 1
		grad11：grad.shape = (50,)，grad.ndim = 1
		grad12：grad.shape = (50,)，grad.ndim = 1
		grad13：grad.shape = (3, 3, 50, 50)，grad.ndim = 4
		grad14：grad.shape = (50,)，grad.ndim = 1
		grad15：grad.shape = (50,)，grad.ndim = 1
		grad16：grad.shape = (50,)，grad.ndim = 1
		grad17：grad.shape = (3, 3, 50, 50)，grad.ndim = 4
		grad18：grad.shape = (50,)，grad.ndim = 1
		grad19：grad.shape = (50,)，grad.ndim = 1
		grad20：grad.shape = (50,)，grad.ndim = 1
		grad21：grad.shape = (3, 3, 50, 150)，grad.ndim = 4
		grad22：grad.shape = (150,)，grad.ndim = 1
		grad23：grad.shape = (150,)，grad.ndim = 1
		grad24：grad.shape = (150,)，grad.ndim = 1
		grad25：grad.shape = (3, 3, 150, 150)，grad.ndim = 4
		grad26：grad.shape = (150,)，grad.ndim = 1
		grad27：grad.shape = (150,)，grad.ndim = 1
		grad28：grad.shape = (150,)，grad.ndim = 1
		grad29：grad.shape = (1, 1, 50, 150)，grad.ndim = 4
		grad30：grad.shape = (150,)，grad.ndim = 1
		grad31：grad.shape = (3, 3, 150, 150)，grad.ndim = 4
		grad32：grad.shape = (150,)，grad.ndim = 1
		grad33：grad.shape = (150,)，grad.ndim = 1
		grad34：grad.shape = (150,)，grad.ndim = 1
		grad35：grad.shape = (3, 3, 150, 150)，grad.ndim = 4
		grad36：grad.shape = (150,)，grad.ndim = 1
		grad37：grad.shape = (150,)，grad.ndim = 1
		grad38：grad.shape = (150,)，grad.ndim = 1
		grad39：grad.shape = (3, 3, 150, 300)，grad.ndim = 4
		grad40：grad.shape = (300,)，grad.ndim = 1
		grad41：grad.shape = (300,)，grad.ndim = 1
		grad42：grad.shape = (300,)，grad.ndim = 1
		grad43：grad.shape = (3, 3, 300, 300)，grad.ndim = 4
		grad44：grad.shape = (300,)，grad.ndim = 1
		grad45：grad.shape = (300,)，grad.ndim = 1
		grad46：grad.shape = (300,)，grad.ndim = 1
		grad47：grad.shape = (1, 1, 150, 300)，grad.ndim = 4
		grad48：grad.shape = (300,)，grad.ndim = 1
		grad49：grad.shape = (3, 3, 300, 300)，grad.ndim = 4
		grad50：grad.shape = (300,)，grad.ndim = 1
		grad51：grad.shape = (300,)，grad.ndim = 1
		grad52：grad.shape = (300,)，grad.ndim = 1
		grad53：grad.shape = (3, 3, 300, 300)，grad.ndim = 4
		grad54：grad.shape = (300,)，grad.ndim = 1
		grad55：grad.shape = (300,)，grad.ndim = 1
		grad56：grad.shape = (300,)，grad.ndim = 1
		grad57：grad.shape = (3, 3, 300, 500)，grad.ndim = 4
		grad58：grad.shape = (500,)，grad.ndim = 1
		grad59：grad.shape = (500,)，grad.ndim = 1
		grad60：grad.shape = (500,)，grad.ndim = 1
		grad61：grad.shape = (3, 3, 500, 500)，grad.ndim = 4
		grad62：grad.shape = (500,)，grad.ndim = 1
		grad63：grad.shape = (500,)，grad.ndim = 1
		grad64：grad.shape = (500,)，grad.ndim = 1
		grad65：grad.shape = (1, 1, 300, 500)，grad.ndim = 4
		grad66：grad.shape = (500,)，grad.ndim = 1
		grad67：grad.shape = (3, 3, 500, 500)，grad.ndim = 4
		grad68：grad.shape = (500,)，grad.ndim = 1
		grad69：grad.shape = (500,)，grad.ndim = 1
		grad70：grad.shape = (500,)，grad.ndim = 1
		grad71：grad.shape = (3, 3, 500, 500)，grad.ndim = 4
		grad72：grad.shape = (500,)，grad.ndim = 1
		grad73：grad.shape = (500,)，grad.ndim = 1
		grad74：grad.shape = (500,)，grad.ndim = 1
		grad75：grad.shape = (500, 100)，grad.ndim = 2
		grad76：grad.shape = (100,)，grad.ndim = 1
	梯度下降步骤-->optimizer.apply_gradients(zip(grads, resnet18_network.trainable_variables))：开始
	梯度下降步骤-->optimizer.apply_gradients(zip(grads, resnet18_network.trainable_variables))：结束

epoch_no = 1, batch_step_no = 2，X_batch.shpae = (500, 32, 32, 3)，Y_batch.shpae = (500,)------------type(X_batch) = <class 'tensorflow.python.framework.ops.EagerTensor'>，type(Y_batch) = <class 'tensorflow.python.framework.ops.EagerTensor'>
	Y_train_one_hot.shpae = (500, 100)
	out_logits.shape = (500, 100)
	MSE_Loss.shape = (500,)
	求均值后：MSE_Loss.shape = ()
	第1个epoch-->第2个batch step的初始时的：MSE_Loss = 5.222436428070068
	梯度下降步骤-->optimizer.apply_gradients(zip(grads, resnet18_network.trainable_variables))：开始
	梯度下降步骤-->optimizer.apply_gradients(zip(grads, resnet18_network.trainable_variables))：结束
...
...
...
...
...
epoch_no = 1, batch_step_no = 100，X_batch.shpae = (500, 32, 32, 3)，Y_batch.shpae = (500,)------------type(X_batch) = <class 'tensorflow.python.framework.ops.EagerTensor'>，type(Y_batch) = <class 'tensorflow.python.framework.ops.EagerTensor'>
	Y_train_one_hot.shpae = (500, 100)
	out_logits.shape = (500, 100)
	MSE_Loss.shape = (500,)
	求均值后：MSE_Loss.shape = ()
	第1个epoch-->第100个batch step的初始时的：MSE_Loss = 4.207188129425049
	梯度下降步骤-->optimizer.apply_gradients(zip(grads, resnet18_network.trainable_variables))：开始
	梯度下降步骤-->optimizer.apply_gradients(zip(grads, resnet18_network.trainable_variables))：结束

++++++++++++++++++++++++++++++++++++++++++++第1轮Epoch-->Training 阶段：结束++++++++++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++++++++++第1轮Epoch-->Evluation 阶段：开始++++++++++++++++++++++++++++++++++++++++++++
epoch_no = 1, batch_step_no = 1，X_batch.shpae = (500, 32, 32, 3)，Y_batch.shpae = (500,)
	out_logits.shape = (500, 100)
	is_correct_count = 18.0

epoch_no = 1, batch_step_no = 2，X_batch.shpae = (500, 32, 32, 3)，Y_batch.shpae = (500,)
	out_logits.shape = (500, 100)
	is_correct_count = 27.0
...
...
...

epoch_no = 1, batch_step_no = 20，X_batch.shpae = (500, 32, 32, 3)，Y_batch.shpae = (500,)
	out_logits.shape = (500, 100)
	is_correct_count = 26.0

total_correct = 454---total_num = 10000
第1轮Epoch迭代的准确度： acc = 0.0454
++++++++++++++++++++++++++++++++++++++++++++第1轮Epoch-->Evluation 阶段：结束++++++++++++++++++++++++++++++++++++++++++++
利用整体数据集进行模型的第1轮Epoch迭代结束:**********************************************************************************************************************************

Process finished with exit code 0

人工智能-深度学习-TensorFlow2：TensorFlow2 创建CNN神经网络模型【ResNet模型】

自定义ResNet神经网络-Tensorflow【cifar100分类数据集】

猜你喜欢