TensorFlow进阶

合并(拼接concat、堆叠stack)、分割

数据统计

向量范数

求解张量在某个维度上的最大值/最小值/均值/和，也可以求全局最大值/最小值/均值/和

张量比较：比较预测类别标签张量和真实类别张量比较，统计准确率指标

import  tensorflow as tf
import  os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.random.set_seed(2467)

def accuracy(output, target, topk=(1,)):
    maxk = max(topk) #6 取top中的前6个概率值的索引号
    batch_size = target.shape[0] #10 样本数

    #output.shape为(10, 6)，此处即为取每个样品的top中的前6个概率值的索引号
    pred = tf.math.top_k(output, maxk).indices #shape=(10, 6) 即[批量大小,6个概率值]
    pred = tf.transpose(pred, perm=[1, 0]) #shape=(6, 10) 即[6个概率值,批量大小]
    target_ = tf.broadcast_to(target, pred.shape) #shape=(10,) => shape=(6, 10) 即把真实类别标签拷贝6份
    # correct.shape=(6, 10) correct中的元素值为True/False
    correct = tf.equal(pred, target_)

    res = []
    for k in topk: #k 为 1/2/3/4/5/6
        #correct[:k]分别为shape=(1, 10)、(2, 10)、(3, 10)、(4, 10)、(5, 10)、(6, 10)
        #correct_k.shape=(10,)分别为shape=(10,)、(20,)、(30,)、(40,)、(50,)、(60,)
        #correct中的元素值为True/False，correct_k的元素值为1/0
        correct_k = tf.cast(tf.reshape(correct[:k], [-1]), dtype=tf.float32)
        correct_k = tf.reduce_sum(correct_k) #correct_k是一个实数值：4.0、4.0、5.0、7.0、8.0、10.0
        acc = float(correct_k* (100.0 / batch_size)) #准确率分别为40.0、40.0、50.0、70.0、80.0、100.0
        res.append(acc)
    return res

#tf.random.normal(shape, mean=0.0, stddev=1.0)可以创建形状为 shape，均值为mean，标准差为 stddev 的正态分布
output = tf.random.normal([10, 6]) #shape=(10, 6) 作为每个样品类别预测概率值
output = tf.math.softmax(output, axis=1) #softmax预算把预测值转换为概率值
#tf.random.uniform(shape, minval=0, maxval=None, dtype=tf.float32)可以创建采样自[minval,maxval]区间的均匀分布的张量
target = tf.random.uniform([10], maxval=6, dtype=tf.int32) #shape=(10,) 作为真实标签值，即真实类别的类别号
print('prob:', output.numpy())
pred = tf.argmax(output, axis=1) 
print('pred:', pred.numpy())    #[0 4 4 4 1 4 3 5 5 1] 获取每个样品的类别预测概率最大值的元素索引，索引号即作为该样品的预测类别号
print('label:', target.numpy()) #[0 2 3 4 2 4 2 3 5 5] 真实类别的类别号

acc = accuracy(output, target, topk=(1,2,3,4,5,6))
print('top-1-6 acc:', acc)

填充与复制：0填充padding

数据限幅：限制数据的范围

张量限幅、梯度裁剪(clip_by_value、clip_by_norm、clip_by_global_norm)

>>> import tensorflow as tf
>>> w1=tf.random.normal([3,3])
>>> w2=tf.random.normal([3,3])
>>> w1
<tf.Tensor: id=5, shape=(3, 3), dtype=float32, numpy=
array([[-0.3745235 , -0.54776704, -0.6978908 ],
       [-0.48667282, -1.9662677 ,  1.2693951 ],
       [-1.6218463 , -1.3147658 ,  1.1985897 ]], dtype=float32)>
#tf.norm(a)默认为执行L2范数的tf.norm(a. ord=2)，等同于tf.sqrt(tf.reduce_sum(tf.square(a)))
#计算裁剪前的网络参数θ的总范数global_norm：所有裁剪前的网络参数θ的L2范数tf.norm(θ)的平方和，然后开方sqrt
>>> global_norm=tf.math.sqrt(tf.norm(w1)**2+tf.norm(w2)**2)
>>> global_norm #4.7265425
<tf.Tensor: id=27, shape=(), dtype=float32, numpy=4.7265425>
#通过tf.clip_by_global_norm([θ],MAX_norm)裁剪后，网络参数的梯度组的总范数缩减到MAX_norm=2
#clip_by_global_norm返回裁剪后的List[参数θ] 和 裁剪前的梯度总范数和global_norm 的2个对象
>>> (ww1,ww2),global_norm=tf.clip_by_global_norm([w1,w2],2) #梯度裁剪一般在计算出梯度后、梯度更新之前进行
>>> ww1
<tf.Tensor: id=47, shape=(3, 3), dtype=float32, numpy=
array([[-0.15847673, -0.2317834 , -0.29530713],
       [-0.20593184, -0.832011  ,  0.53713477],
       [-0.6862717 , -0.556333  ,  0.50717396]], dtype=float32)>
>>> ww2
<tf.Tensor: id=48, shape=(3, 3), dtype=float32, numpy=
array([[ 0.03117203, -0.7264457 ,  0.32293826],
       [ 0.5894358 ,  0.87403387,  0.04680141],
       [ 0.0015509 ,  0.15240058,  0.05759645]], dtype=float32)>
>>> global_norm
<tf.Tensor: id=35, shape=(), dtype=float32, numpy=4.7265425>
#计算裁剪后的网络参数θ的总范数global_norm：所有裁剪后的网络参数θ的L2范数tf.norm(θ)的平方和，然后开方sqrt
>>> global_norm2 = tf.math.sqrt(tf.norm(ww1)**2+tf.norm(ww2)**2)
>>> global_norm2
<tf.Tensor: id=64, shape=(), dtype=float32, numpy=1.9999998>
>>> print(global_norm, global_norm2)
tf.Tensor(4.7265425, shape=(), dtype=float32) tf.Tensor(1.9999998, shape=(), dtype=float32)

import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import datasets, layers, optimizers
import  os

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
print(tf.__version__)

(x, y), _ = datasets.mnist.load_data()
x = tf.convert_to_tensor(x, dtype=tf.float32) / 50. #标准化/归一化
y = tf.convert_to_tensor(y)
y = tf.one_hot(y, depth=10) #真实标签one-hot化
print('x:', x.shape, 'y:', y.shape)
#构建批量大小和epoch训练次数
train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128).repeat(30)
x,y = next(iter(train_db))  #获取生成器并调用next遍历第一个批量大小的数据
print('sample:', x.shape, y.shape)
# print(x[0], y[0])

def main():
    # 784 => 512 第一层权重[输入神经元数, 输出神经元数]，偏置[输出神经元数]
    w1, b1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1)), tf.Variable(tf.zeros([512]))
    # 512 => 256 第二层权重[输入神经元数, 输出神经元数]，偏置[输出神经元数]
    w2, b2 = tf.Variable(tf.random.truncated_normal([512, 256], stddev=0.1)), tf.Variable(tf.zeros([256]))
    # 256 => 10 第三层权重[输入神经元数, 输出神经元数]，偏置[输出神经元数]
    w3, b3 = tf.Variable(tf.random.truncated_normal([256, 10], stddev=0.1)), tf.Variable(tf.zeros([10]))
    optimizer = optimizers.SGD(lr=0.01) #SGD随机梯度下降优化算法

    #每次遍历训练集生成器的一个批量大小数据
    for step, (x,y) in enumerate(train_db):
        # [b, 28, 28] => [b, 784] 展平
        x = tf.reshape(x, (-1, 784))
        #构建梯度记录环境
        with tf.GradientTape() as tape:
            # layer1.
            h1 = x @ w1 + b1
            h1 = tf.nn.relu(h1)
            # layer2
            h2 = h1 @ w2 + b2
            h2 = tf.nn.relu(h2)
            # output
            out = h2 @ w3 + b3
            # out = tf.nn.relu(out)

            # compute loss
            # [b, 10] - [b, 10] 均方差mse = mean(sum(y-out)^2) 预测值与真实值之差的平方的平均值
            loss = tf.square(y-out)
            # [b, 10] => [b] 计算每个样本的平均误差
            loss = tf.reduce_mean(loss, axis=1)
            # [b] => scalar 总误差除以样本数
            loss = tf.reduce_mean(loss)

        #1.求导，tape.gradient(y,[参数θ])求参数θ相对于y的梯度信息
        #  dy_dw = tape.gradient(y, [w])
        #2.通过tape.gradient(loss,[参数θ])函数求得网络参数θ的梯度信息
        #  grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3]) 
        #  compute gradients。根据loss 求w1, w2, w3, b1, b2, b3的梯度值 用于后面继续更新对应的模型参数θ。
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])

        # print('==before==')
        # for g in grads:  #计算所有裁剪前的网络参数θ的梯度值的L2范数tf.norm(a)，等同于tf.norm(a. ord=2)、tf.sqrt(tf.reduce_sum(tf.square(a)))
        #     print(tf.norm(g)) #tf.norm(a)默认为执行L2范数tf.norm(a. ord=2)，等同于tf.sqrt(tf.reduce_sum(tf.square(a)))
 
        #通过tf.clip_by_global_norm([θ],MAX_norm)裁剪后，网络参数的梯度组的总范数缩减到MAX_norm=15
        #clip_by_global_norm返回裁剪后的List[参数θ] 和 裁剪前的梯度总范数和global_norm 的2个对象
        grads,  _ = tf.clip_by_global_norm(grads, 15) #梯度裁剪一般在计算出梯度后、梯度更新之前进行

        # print('==after==')
        # for g in grads:  #计算所有裁剪后的网络参数θ的梯度值的L2范数tf.norm(a)，等同于tf.norm(a. ord=2)、tf.sqrt(tf.reduce_sum(tf.square(a)))
        #     print(tf.norm(g)) #tf.norm(a)默认为执行L2的tf.norm(a. ord=2)，等同于tf.sqrt(tf.reduce_sum(tf.square(a)))

        #优化器规则，根据 模型参数θ = θ - lr * grad 更新网络参数
        # update w' = w - lr*grad
        optimizer.apply_gradients(zip(grads, [w1, b1, w2, b2, w3, b3]))

        if step % 100 == 0:
            print(step, 'loss:', float(loss))

if __name__ == '__main__':
    main()

tf.gather：根据axis轴上的索引维度获取数据

tf.gather_nd：根据axis轴上批量的索引维度获取数据

tf.boolean_mask：通过给定掩码(mask)的方式采样

tf.where：根据cond条件的真假从a或b中读取数据 / 获取cond张量中所有为True的元素的索引

scatter_nd：往全0张量中的指定索引上更新数值

tf.meshgrid：生成网格坐标

>>> x.shape
TensorShape([100])
>>> y.shape
TensorShape([100])
>>> x1,y2 = tf.meshgrid(x,y)
>>> x1.shape
TensorShape([100, 100])
>>> y1.shape
TensorShape([100, 100])

import tensorflow as tf
import matplotlib.pyplot as plt

def func(x):
    """
    :param x: [b, 2]
    :return:
    """
    z = tf.math.sin(x[...,0]) + tf.math.sin(x[...,1])
    return z

#x轴上采样500个数据点
x = tf.linspace(0., 2*3.14, 500) #TensorShape([500])
#y轴上采样500个数据点
y = tf.linspace(0., 2*3.14, 500) #TensorShape([500])
# [50, 50] point_x和point_y的shape均为TensorShape([500, 500])
#生成网格点，并拆分返回，point_x包含所有数据点的x坐标，point_y包含所有数据点的y坐标
point_x, point_y = tf.meshgrid(x, y)
# [50, 50, 2] 把point_x和point_y在axis=2轴上拼接为一个张量
points = tf.stack([point_x, point_y], axis=2)
# points = tf.reshape(points, [-1, 2])
print('points:', points.shape) #points: (500, 500, 2)
z = func(points)
print('z:', z.shape) #TensorShape([500, 500])

plt.figure('plot 2d func value')
plt.imshow(z, origin='lower', interpolation='none')
plt.colorbar()
plt.figure('plot 2d func contour')
plt.contour(point_x, point_y, z)
plt.colorbar()
plt.show()

常用数据集功能：预处理(Preprocess)、随机打散(Shuffle)、批训练(Train on batch)等

import  matplotlib
from    matplotlib import pyplot as plt
# Default parameters for plots
matplotlib.rcParams['font.size'] = 20
matplotlib.rcParams['figure.titlesize'] = 20
matplotlib.rcParams['figure.figsize'] = [9, 7]
matplotlib.rcParams['font.family'] = ['STKaiTi']
matplotlib.rcParams['axes.unicode_minus']=False 
import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import datasets, layers, optimizers
import  os
 
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
print(tf.__version__)
 
def preprocess(x, y): 
    # [b, 28, 28], [b]
    print(x.shape,y.shape)
    x = tf.cast(x, dtype=tf.float32) / 255. #标准化
    x = tf.reshape(x, [-1, 28*28])  #展平为(批量大小, 行*列)
    y = tf.cast(y, dtype=tf.int32)  
    y = tf.one_hot(y, depth=10)	#真实标签one-hot化，处理为(样本数,10)
    return x,y
 
(x, y), (x_test, y_test) = datasets.mnist.load_data()
print('x:', x.shape, 'y:', y.shape, 'x test:', x_test.shape, 'y test:', y_test)
 
batchsz = 512
train_db = tf.data.Dataset.from_tensor_slices((x, y)) #构建样本数据+真实标签的可遍历的数据集
train_db = train_db.shuffle(1000)	#shuffle打乱样本数据顺序
train_db = train_db.batch(batchsz)  #构建数据集的批量大小的维度
train_db = train_db.map(preprocess) #对每个批量中的样本数据+真实标签进行预处理
train_db = train_db.repeat(20)	#repeat定义epoch训练次数
 
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.shuffle(1000).batch(batchsz).map(preprocess)
x,y = next(iter(train_db)) #使用iter返回生成器，next执行生成器的一次遍历，即获取一个批量大小的数据
print('train sample:', x.shape, y.shape)
# print(x[0], y[0])
 
def main():
    # learning rate
    lr = 1e-2 #学习率
    accs,losses = [], [] 

    # 784 => 512 第一层权重w1.shape为(784, 256)，偏置b1.shape为(256)
    w1, b1 = tf.Variable(tf.random.normal([784, 256], stddev=0.1)), tf.Variable(tf.zeros([256]))
    # 512 => 256 第二层权重w2.shape为(256, 128)，偏置b2.shape为(128)
    w2, b2 = tf.Variable(tf.random.normal([256, 128], stddev=0.1)), tf.Variable(tf.zeros([128]))
    # 256 => 10 第三层权重w3.shape为(128, 10)，偏置b3.shape为(10)
    w3, b3 = tf.Variable(tf.random.normal([128, 10], stddev=0.1)), tf.Variable(tf.zeros([10]))
 
    #每次遍历一个批量大小的数据(批量大小, 28, 28)
    for step, (x,y) in enumerate(train_db):
        # [b, 28, 28] => [b, 784]  展平为(批量大小, 行*列)
        x = tf.reshape(x, (-1, 784))
        #记录参数θ的梯度
        with tf.GradientTape() as tape:
            # layer1. 第一层隐藏层：线性x@w1+b1、非线性relu(h1)
            h1 = x @ w1 + b1
            h1 = tf.nn.relu(h1)
            # layer2 第二层隐藏层：线性h1@w2+b2、非线性relu(h2)
            h2 = h1 @ w2 + b2
            h2 = tf.nn.relu(h2)
            # output 第三层输出层：线性h2@w3+b2
            out = h2 @ w3 + b3
            # out = tf.nn.relu(out)

            # compute loss 计算mse = mean(sum(y-out)^2)
            # [b, 10] - [b, 10]
            loss = tf.square(y-out)
            # [b, 10] => scalar
            loss = tf.reduce_mean(loss)
        #1.求导，tape.gradient(y,[参数θ])求参数θ相对于y的梯度信息
        #  dy_dw = tape.gradient(y, [w])
        #2.通过tape.gradient(loss,[参数θ])函数求得网络参数θ的梯度信息
        #  grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3]) 
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3]) 
        #assign_sub()将原地(In-place)减去给定的参数值，实现参数θ的自我更新网络参数
        for p, g in zip([w1, b1, w2, b2, w3, b3], grads):
            p.assign_sub(lr * g) #θ = θ - lr * θ_grad
 
        # print
        if step % 80 == 0:
            print(step, 'loss:', float(loss))
            losses.append(float(loss))
 
        if step %80 == 0:
            # evaluate/test
            total, total_correct = 0., 0

            for x, y in test_db:
                # layer1.
                h1 = x @ w1 + b1
                h1 = tf.nn.relu(h1)
                # layer2
                h2 = h1 @ w2 + b2
                h2 = tf.nn.relu(h2)
                # output
                out = h2 @ w3 + b3
                # [b, 10] => [b]
                pred = tf.argmax(out, axis=1) #获取第二维度的类别最大概率值的索引值，即为类别索引号
                # convert one_hot y to number y
                y = tf.argmax(y, axis=1) #因为真实标签已经one_hot化了，因此同样需要获取第二维度的类别最大概率值的索引值，即为类别索引号
                # bool type
                correct = tf.equal(pred, y) #比较各对应索引上的类别值是否相同，返回True/False的bool值
                # bool tensor => int tensor => numpy 
                total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()#把bool值转换为int，然后计算总和
                total += x.shape[0] #总和除以样本数，计算一个批量大小样本数的平均准确率

            print(step, 'Evaluate Acc:', total_correct/total)
            accs.append(total_correct/total) #计算全部批量大小样本数的平均准确率


    plt.figure()
    x = [i*80 for i in range(len(losses))]
    plt.plot(x, losses, color='C0', marker='s', label='训练')
    plt.ylabel('MSE')
    plt.xlabel('Step')
    plt.legend()
    plt.savefig('train.svg')

    plt.figure()
    plt.plot(x, accs, color='C1', marker='s', label='测试')
    plt.ylabel('准确率')
    plt.xlabel('Step')
    plt.legend()
    plt.savefig('test.svg')

if __name__ == '__main__':
    main()

import  tensorflow as tf
from    tensorflow import keras
from    tensorflow.keras import datasets
import  os

a = tf.random.normal([4,35,8]) # 模拟成绩册A
b = tf.random.normal([6,35,8]) # 模拟成绩册B
tf.concat([a,b],axis=0) 	   # 合并成绩册
 
x = tf.random.normal([2,784])
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
o1 = tf.matmul(x,w1) + b1
o1 = tf.nn.relu(o1)