人工智能-深度学习-自然语言处理(NLP)-TensorFlow2 :TensorFlow2 创建RNN、LSTM、GRU神经网络模型-imdb数据集【电影评论情感二分类】

一、RNN案例

1、Tensorflow2-SimpleRNNCell案例(构建每一个cell及memorycell)-imdb数据集【电影评论情感二分类】

import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import time

tf.random.set_seed(22)
np.random.seed(22)

assert tf.__version__.startswith('2.')

batch_size = 500  # 每次训练500个句子

total_words = 10000  # the most frequest words
max_review_len = 80  # 设置句子长度,如果有的句子的长度不到80则补齐,如果有的句子超过80则截断
embedding_len = 100  # 每个单词转为向量后的向量维度

# 一、获取数据集
(X_train, Y_train), (X_val, Y_val) = keras.datasets.imdb.load_data(num_words=total_words)
print('X_train[0] = {0},\nY_train[0] = {1}'.format(X_train[0], Y_train[0]))
print('X_train.shpae = {0},Y_train.shpae = {1}------------type(X_train) = {2},type(Y_train) = {3}'.format(X_train.shape, Y_train.shape, type(X_train), type(Y_train)))

# 二、数据处理
# 2.1 # 设置句子统一长度
X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_review_len)  # 设置句子长度    [b, 80]
X_val = keras.preprocessing.sequence.pad_sequences(X_val, maxlen=max_review_len)  # 设置句子长度
print('X_train.shpae = {0},Y_train.shpae = {1},tf.reduce_max(Y_train) = {2},tf.reduce_min(Y_train) = {3}'.format(X_train.shape, Y_train.shape, tf.reduce_max(Y_train), tf.reduce_min(Y_train)))
# 2.1 处理训练集为batch模型
db_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
db_train = db_train.shuffle(1000).batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
db_val = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
db_val = db_val.batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
print('db_train = {0},len(db_train) = {1}'.format(db_train, len(db_train)))


class MyRNN(keras.Model):
    def __init__(self, output_dim):
        super(MyRNN, self).__init__()
        # ***********************************************************memoryCell***********************************************************
        # [b, 64]
        # 用于保存上一次的隐藏状态的输出值h_{t-1},作为计算本次的输出值h_t时的输入值之一
        # 使用多个memoryCell串联即实现Deep的作用
        self.memoryCell01 = [tf.zeros([batch_size, output_dim])]  # 初始化memoryCell01,维度为 [b, 64]
        self.memoryCell02 = [tf.zeros([batch_size, output_dim])]  # 初始化memoryCell02,维度为 [b, 64]
        # ***********************************************************Embedding***********************************************************
        # 将每一个句子(维度为[80,1],80表示每个句子包含的word数量,1表示1个word)变换为wordEmbedding(维度为[80,100],80表示每个句子包含的word数量,100表示每个wordEmbedding的维度)
        # [b, 80, 1] => [b, 80, 100]
        # input_dim:表示输入维度,即设定词库总单词数量;b
        # input_length:表示每个句子统一长度(包含的单词数量);80
        # output_dim:表示输出维度,即每个单词转为向量后的向量维度;100
        self.embedding = layers.Embedding(input_dim=total_words, input_length=max_review_len, output_dim=embedding_len)
        # ***********************************************************RNNCell Layer***********************************************************
        # [b, 80, 100]=>[b, 64]
        self.rnn_cell01 = layers.SimpleRNNCell(output_dim, dropout=0.2)  # output_dim: dimensionality of the output space. 隐藏状态的维度;dropout 防止过拟合
        self.rnn_cell02 = layers.SimpleRNNCell(output_dim, dropout=0.2)
        # ***********************************************************全连接层***********************************************************
        # [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)

    def call(self, inputs, training=None):
        """
        net(x) net(x, training=True) :train mode
        net(x, training=False): test mode
        :param inputs: [b, 80, 1]
        :param training:
        :return:
        """
        # ***********************************************************Embedding***********************************************************
        # embedding: [b, 80, 1] => [b, 80, 100]
        wordEmbeddings = self.embedding(inputs)  # inputs 为1个batch的句子文本
        print('\nwordEmbeddings.shape = {0}, wordEmbeddings = {1}'.format(wordEmbeddings.shape, wordEmbeddings))
        # rnn cell compute
        # ***********************************************************RNNCell Layer***********************************************************
        # [b, 80, 100] => [b, 1, 64],每个句子都从降维:[80, 100]=>[1, 64]
        memoryCell01 = self.memoryCell01
        memoryCell02 = self.memoryCell02
        wordEmbedding_index = 0
        for wordEmbedding in tf.unstack(wordEmbeddings, axis=1):  # wordEmbedding: [b, 100],将每个句子中的80个单词展开,即按读取该句子的时间轴展开
            # 隐含状态:out01/out02: [b, 64]
            # h_t = x_t×w_{xh}+h_{t-1}×w_{hh};其中:x_t=wordEmbedding;h_{t-1}=memoryCell01;输出值h_t = out01
            out01, memoryCell01_current = self.rnn_cell01(wordEmbedding, memoryCell01, training=training)  # training=True 表示模式是训练模式,dropout功能有效,默认是True
            memoryCell01 = memoryCell01_current  # 并将h_t替代memoryCell01中的旧的h_{t-1}用于下个单词
            # 将rnn_cell01的输出值out01传入下一个rnn_cell02提升RNNCell Layer的提取能力
            out02, memoryCell02_current = self.rnn_cell02(out01, memoryCell02, training=training)  # training=True 表示模式是训练模式,dropout功能有效,默认是True
            memoryCell02 = memoryCell02_current  # 并将h_t替代memoryCell02中的旧的h_{t-1}用于下个单词
            if wordEmbedding_index == 0:
                print('wordEmbedding.shape = {0}, wordEmbedding = {1}'.format(wordEmbedding.shape, wordEmbedding))
                print('out01.shape = {0}, out01 = {1}'.format(out01.shape, out01))
                print('out02.shape = {0}, out02 = {1}'.format(out02.shape, out02))
            wordEmbedding_index += 1
        # ***********************************************************全连接层***********************************************************
        # out: [b, 1, 64] => [b, 1, 1]
        out_logit = self.outlayer(out02)  # out02代表了每个句子的语义信息的提取
        print('out_logit.shape = {0}, out_logit = {1}'.format(out_logit.shape, out_logit))
        out_prob = tf.sigmoid(out_logit)  # p(y is pos|x)
        print('out_prob.shape = {0}, out_prob = {1}, {2}'.format(out_prob.shape, out_prob, '\n'))
        return out_prob


def main():
    output_dim = 64     # 设定输出的隐藏状态维度  [b, 100] => [b,64]
    epochs = 4
    t0 = time.time()
    network = MyRNN(output_dim)
    # 不需要设置from_logits=True,因为MyRNN()中已经设定了激活函数层 out_prob = tf.sigmoid(X)
    # metrics=['accuracy']表示打印测试数据
    network.compile(optimizer=keras.optimizers.Adam(0.001),
                    loss=tf.losses.BinaryCrossentropy(),
                    metrics=['accuracy'])
    print('\n***********************************************************训练network:开始***********************************************************')
    network.fit(db_train, epochs=epochs, validation_data=db_val)
    print('***********************************************************训练network:结束***********************************************************')
    print('\n***********************************************************评估network(其实训练时已经评估):开始***********************************************************')
    network.evaluate(db_val)  # 评估模型
    print('***********************************************************评估network(其实训练时已经评估):结束***********************************************************')
    t1 = time.time()
    print('total time cost:', t1 - t0)


if __name__ == '__main__':
    main()

打印结果:

X_train[0] = [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32],
Y_train[0] = 1
X_train.shpae = (25000,),Y_train.shpae = (25000,)------------type(X_train) = <class 'numpy.ndarray'>type(Y_train) = <class 'numpy.ndarray'>
X_train.shpae = (25000, 80),Y_train.shpae = (25000,),tf.reduce_max(Y_train) = 1,tf.reduce_min(Y_train) = 0
db_train = <BatchDataset shapes: ((500, 80), (500,)), types: (tf.int32, tf.int64)>len(db_train) = 50

***********************************************************训练network:开始***********************************************************
Epoch 1/4

wordEmbeddings.shape = (500, 80, 100), wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
wordEmbedding.shape = (500, 100), wordEmbedding = Tensor("my_rnn/unstack:0", shape=(500, 100), dtype=float32)
out01.shape = (500, 64), out01 = Tensor("my_rnn/simple_rnn_cell/Tanh:0", shape=(500, 64), dtype=float32)
out02.shape = (500, 64), out02 = Tensor("my_rnn/simple_rnn_cell_1/Tanh:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 


wordEmbeddings.shape = (500, 80, 100), wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
wordEmbedding.shape = (500, 100), wordEmbedding = Tensor("my_rnn/unstack:0", shape=(500, 100), dtype=float32)
out01.shape = (500, 64), out01 = Tensor("my_rnn/simple_rnn_cell/Tanh:0", shape=(500, 64), dtype=float32)
out02.shape = (500, 64), out02 = Tensor("my_rnn/simple_rnn_cell_1/Tanh:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - ETA: 0s - loss: 0.6942 - accuracy: 0.5303
wordEmbeddings.shape = (500, 80, 100), wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
wordEmbedding.shape = (500, 100), wordEmbedding = Tensor("my_rnn/unstack:0", shape=(500, 100), dtype=float32)
out01.shape = (500, 64), out01 = Tensor("my_rnn/simple_rnn_cell/Tanh:0", shape=(500, 64), dtype=float32)
out02.shape = (500, 64), out02 = Tensor("my_rnn/simple_rnn_cell_1/Tanh:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - 11s 125ms/step - loss: 0.6938 - accuracy: 0.5309 - val_loss: 0.5607 - val_accuracy: 0.7175
Epoch 2/4
50/50 [==============================] - 5s 98ms/step - loss: 0.4480 - accuracy: 0.7937 - val_loss: 0.4222 - val_accuracy: 0.8073
Epoch 3/4
50/50 [==============================] - 5s 99ms/step - loss: 0.2625 - accuracy: 0.8933 - val_loss: 0.4523 - val_accuracy: 0.8001
Epoch 4/4
50/50 [==============================] - 5s 98ms/step - loss: 0.1500 - accuracy: 0.9448 - val_loss: 0.5610 - val_accuracy: 0.8037
***********************************************************训练network:结束***********************************************************

***********************************************************评估network(其实训练时已经评估):开始***********************************************************
50/50 [==============================] - 1s 23ms/step - loss: 0.5610 - accuracy: 0.8037
***********************************************************评估network(其实训练时已经评估):结束***********************************************************
total time cost: 26.676692247390747

Process finished with exit code 0

2、Tensorflow2-SimpleRNN案例-imdb数据集【电影评论情感二分类】

import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import time

tf.random.set_seed(22)
np.random.seed(22)

assert tf.__version__.startswith('2.')

batch_size = 500  # 每次训练500个句子

total_words = 10000  # the most frequest words
max_review_len = 80  # 设置句子长度,如果有的句子的长度不到80则补齐,如果有的句子超过80则截断
embedding_len = 100  # 每个单词转为向量后的向量维度

# 一、获取数据集
(X_train, Y_train), (X_val, Y_val) = keras.datasets.imdb.load_data(num_words=total_words)
print('X_train[0] = {0},\nY_train[0] = {1}'.format(X_train[0], Y_train[0]))
print('X_train.shpae = {0},Y_train.shpae = {1}------------type(X_train) = {2},type(Y_train) = {3}'.format(X_train.shape, Y_train.shape, type(X_train), type(Y_train)))

# 二、数据处理
# 2.1 # 设置句子统一长度
X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_review_len)  # 设置句子长度    [b, 80]
X_val = keras.preprocessing.sequence.pad_sequences(X_val, maxlen=max_review_len)  # 设置句子长度
print('X_train.shpae = {0},Y_train.shpae = {1},tf.reduce_max(Y_train) = {2},tf.reduce_min(Y_train) = {3}'.format(X_train.shape, Y_train.shape, tf.reduce_max(Y_train), tf.reduce_min(Y_train)))
# 2.1 处理训练集为batch模型
db_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
db_train = db_train.shuffle(1000).batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
db_val = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
db_val = db_val.batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
print('db_train = {0},len(db_train) = {1}'.format(db_train, len(db_train)))


class MyRNN(keras.Model):
    def __init__(self, output_dim):
        super(MyRNN, self).__init__()
        # ***********************************************************Embedding***********************************************************
        # transform text to embedding representation
        # 将每一个句子(维度为[80,1],80表示每个句子包含的word数量,1表示1个word)变换为wordEmbedding(维度为[80,100],80表示每个句子包含的word数量,100表示每个wordEmbedding的维度)
        # [b, 80, 1] => [b, 80, 100]
        # input_dim:表示输入维度,即设定词库总单词数量;b
        # input_length:表示每个句子统一长度(包含的单词数量);80
        # output_dim:表示输出维度,即每个单词转为向量后的向量维度;100
        self.embedding = layers.Embedding(input_dim=total_words, input_length=max_review_len, output_dim=embedding_len)
        # ***********************************************************RNN神经网络结构:SimpleRNN 表示SimpleRNN连接层***********************************************************
        # [b, 80, 100]=>[b, 64]
        self.rnn = keras.Sequential([
            # output_dim: dimensionality of the output space. 隐藏状态的维度;dropout 防止过拟合
            # return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence.
            # unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used.
            # Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences.
            layers.SimpleRNN(output_dim, dropout=0.5, return_sequences=True, unroll=True),
            layers.SimpleRNN(output_dim, dropout=0.5, unroll=True)
        ])
        # ***********************************************************全连接层***********************************************************
        # [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)

    def call(self, inputs, training=None):
        """
        net(x) net(x, training=True) :train mode
        net(x, training=False): test
        :param inputs: [b, 80]
        :param training:
        :return:
        """
        # ***********************************************************Embedding***********************************************************
        # embedding: [b, 80, 1] => [b, 80, 100]
        x_wordEmbeddings = self.embedding(inputs)  # inputs 为1个batch的句子文本
        print('\nx_wordEmbeddings.shape = {0}, x_wordEmbeddings = {1}'.format(x_wordEmbeddings.shape, x_wordEmbeddings))
        # ***********************************************************RNN神经网络结构计算***********************************************************
        out = self.rnn(x_wordEmbeddings)    # x: [b, 80, 100] => [b, 64]
        print('out.shape = {0}, out = {1}'.format(out.shape, out))
        out_logit = self.outlayer(out)  # 隐含状态=>0/1   out: [b, 64] => [b, 1]
        print('out_logit.shape = {0}, out_logit = {1}'.format(out_logit.shape, out_logit))
        out_prob = tf.sigmoid(out_logit)    # p(y is pos|x)
        print('out_prob.shape = {0}, out_prob = {1}, {2}'.format(out_prob.shape, out_prob, '\n'))
        return out_prob


def main():
    output_dim = 64     # 设定输出的隐藏状态维度  [b, 100] => [b,64]
    epochs = 4
    t0 = time.time()
    network = MyRNN(output_dim)
    # 不需要设置from_logits=True,因为MyRNN()中已经设定了激活函数层 out_prob = tf.sigmoid(X)
    # metrics=['accuracy']表示打印测试数据
    network.compile(optimizer=keras.optimizers.Adam(0.001),
                  loss=tf.losses.BinaryCrossentropy(),
                  metrics=['accuracy'])
    print('\n***********************************************************训练network:开始***********************************************************')
    network.fit(db_train, epochs=epochs, validation_data=db_val)
    print('***********************************************************训练network:结束***********************************************************')
    print('\n***********************************************************评估network(其实训练时已经评估):开始***********************************************************')
    network.evaluate(db_val)  # 评估模型
    print('***********************************************************评估network(其实训练时已经评估):结束***********************************************************')
    t1 = time.time()
    print('total time cost:', t1 - t0)


if __name__ == '__main__':
    main()

打印结果:

X_train[0] = [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32],
Y_train[0] = 1
X_train.shpae = (25000,),Y_train.shpae = (25000,)------------type(X_train) = <class 'numpy.ndarray'>type(Y_train) = <class 'numpy.ndarray'>
X_train.shpae = (25000, 80),Y_train.shpae = (25000,),tf.reduce_max(Y_train) = 1,tf.reduce_min(Y_train) = 0
db_train = <BatchDataset shapes: ((500, 80), (500,)), types: (tf.int32, tf.int64)>len(db_train) = 50

***********************************************************训练network:开始***********************************************************
Epoch 1/4

x_wordEmbeddings.shape = (500, 80, 100), x_wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
out.shape = (500, 64), out = Tensor("my_rnn/sequential/simple_rnn_1/simple_rnn_cell_1/Tanh_79:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 


x_wordEmbeddings.shape = (500, 80, 100), x_wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
out.shape = (500, 64), out = Tensor("my_rnn/sequential/simple_rnn_1/simple_rnn_cell_1/Tanh_79:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - ETA: 0s - loss: 0.7086 - accuracy: 0.5031
x_wordEmbeddings.shape = (500, 80, 100), x_wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
out.shape = (500, 64), out = Tensor("my_rnn/sequential/simple_rnn_1/simple_rnn_cell_1/Tanh_79:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - 11s 129ms/step - loss: 0.7084 - accuracy: 0.5034 - val_loss: 0.6804 - val_accuracy: 0.5906
Epoch 2/4
50/50 [==============================] - 5s 94ms/step - loss: 0.6384 - accuracy: 0.6291 - val_loss: 0.4407 - val_accuracy: 0.7966
Epoch 3/4
50/50 [==============================] - 5s 95ms/step - loss: 0.4024 - accuracy: 0.8191 - val_loss: 0.4072 - val_accuracy: 0.8284
Epoch 4/4
50/50 [==============================] - 5s 94ms/step - loss: 0.2899 - accuracy: 0.8829 - val_loss: 0.4479 - val_accuracy: 0.8289
***********************************************************训练network:结束***********************************************************

***********************************************************评估network(其实训练时已经评估):开始***********************************************************
50/50 [==============================] - 1s 24ms/step - loss: 0.4479 - accuracy: 0.8289
***********************************************************评估network(其实训练时已经评估):结束***********************************************************
total time cost: 26.05630612373352

Process finished with exit code 0

二、LSTM案例

1、Tensorflow2-LSTMCell案例(构建每一个cell及memorycell)-imdb数据集【电影评论情感二分类】

import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import time

tf.random.set_seed(22)
np.random.seed(22)

assert tf.__version__.startswith('2.')

batch_size = 500  # 每次训练500个句子

total_words = 10000  # the most frequest words
max_review_len = 80  # 设置句子长度,如果有的句子的长度不到80则补齐,如果有的句子超过80则截断
embedding_len = 100  # 每个单词转为向量后的向量维度

# 一、获取数据集
(X_train, Y_train), (X_val, Y_val) = keras.datasets.imdb.load_data(num_words=total_words)
print('X_train[0] = {0},\nY_train[0] = {1}'.format(X_train[0], Y_train[0]))
print('X_train.shpae = {0},Y_train.shpae = {1}------------type(X_train) = {2},type(Y_train) = {3}'.format(X_train.shape, Y_train.shape, type(X_train), type(Y_train)))

# 二、数据处理
# 2.1 # 设置句子统一长度
X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_review_len)  # 设置句子长度    [b, 80]
X_val = keras.preprocessing.sequence.pad_sequences(X_val, maxlen=max_review_len)  # 设置句子长度
print('X_train.shpae = {0},Y_train.shpae = {1},tf.reduce_max(Y_train) = {2},tf.reduce_min(Y_train) = {3}'.format(X_train.shape, Y_train.shape, tf.reduce_max(Y_train), tf.reduce_min(Y_train)))
# 2.1 处理训练集为batch模型
db_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
db_train = db_train.shuffle(1000).batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
db_val = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
db_val = db_val.batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
print('db_train = {0},len(db_train) = {1}'.format(db_train, len(db_train)))


class MyRNN(keras.Model):
    def __init__(self, output_dim):
        super(MyRNN, self).__init__()
        # ***********************************************************memoryCell***********************************************************
        # [b, 64, 64]
        # memoryCell用于保存上一次的隐藏状态值C_{t-1}以及上一次的输出值h_{t-1}、,用于计算本次的C_{t-1}以及h_t时的输入值
        # 使用多个memoryCell串联即实现Deep的作用
        self.memoryCell01 = [tf.zeros([batch_size, output_dim]), tf.zeros([batch_size, output_dim])]  # 初始化memoryCell01,维度为  [b, 64, 64]
        self.memoryCell02 = [tf.zeros([batch_size, output_dim]), tf.zeros([batch_size, output_dim])]  # 初始化memoryCell02,维度为  [b, 64, 64]
        # ***********************************************************Embedding***********************************************************
        # 将每一个句子(维度为[80,1],80表示每个句子包含的word数量,1表示1个word)变换为wordEmbedding(维度为[80,100],80表示每个句子包含的word数量,100表示每个wordEmbedding的维度)
        # [b, 80, 1] => [b, 80, 100]
        # input_dim:表示输入维度,即设定词库总单词数量;b
        # input_length:表示每个句子统一长度(包含的单词数量);80
        # output_dim:表示输出维度,即每个单词转为向量后的向量维度;100
        self.embedding = layers.Embedding(input_dim=total_words, input_length=max_review_len, output_dim=embedding_len)
        # ***********************************************************RNNCell Layer***********************************************************
        # [b, 80, 100]=>[b, 64]
        self.rnn_cell01 = layers.LSTMCell(output_dim, dropout=0.2)  # output_dim: dimensionality of the output space. 隐藏状态的维度;dropout 防止过拟合
        self.rnn_cell02 = layers.LSTMCell(output_dim, dropout=0.2)
        # ***********************************************************全连接层***********************************************************
        # [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)

    def call(self, inputs, training=None):
        """
        net(x) net(x, training=True) :train mode
        net(x, training=False): test mode
        :param inputs: [b, 80, 1]
        :param training:
        :return:
        """
        # ***********************************************************Embedding***********************************************************
        # embedding: [b, 80, 1] => [b, 80, 100]
        wordEmbeddings = self.embedding(inputs)  # inputs 为1个batch的句子文本
        print('\nwordEmbeddings.shape = {0}, wordEmbeddings = {1}'.format(wordEmbeddings.shape, wordEmbeddings))
        # rnn cell compute
        # ***********************************************************RNNCell Layer***********************************************************
        # [b, 80, 100] => [b, 1, 64],每个句子都从降维:[80, 100]=>[1, 64]
        memoryCell01 = self.memoryCell01
        memoryCell02 = self.memoryCell02
        wordEmbedding_index = 0
        for wordEmbedding in tf.unstack(wordEmbeddings, axis=1):  # wordEmbedding: [b, 100],将每个句子中的80个单词展开,即按读取该句子的时间轴展开
            # 隐含状态:out01/out02: [b, 64]
            # h_t = x_t×w_{xh}+h_{t-1}×w_{hh};其中:x_t=wordEmbedding;h_{t-1}=memoryCell01;输出值h_t = out01
            # memoryCell01保存2个值:第一个值是隐藏状态C_t,第二个值是隐藏状态输出值h_t
            out01, memoryCell01 = self.rnn_cell01(wordEmbedding, memoryCell01, training=training)  # 用输出值更新memoryCell01;   training=True 表示模式是训练模式,dropout功能有效,默认是True
            # 将rnn_cell01的输出值out01传入下一个rnn_cell02提升RNNCell Layer的提取能力
            # memoryCell01保存2个值:第一个值是隐藏状态C_t,第二个值是隐藏状态输出值h_t
            out02, memoryCell02 = self.rnn_cell02(out01, memoryCell02, training=training)  # 用输出值更新memoryCell02; training=True 表示模式是训练模式,dropout功能有效,默认是True
            if wordEmbedding_index == 0:
                print('wordEmbedding.shape = {0}, wordEmbedding = {1}'.format(wordEmbedding.shape, wordEmbedding))
                print('out01.shape = {0}, memoryCell01[0].shape = {1}, out01 = {2}'.format(out01.shape, memoryCell01[0].shape, out01))
                print('out02.shape = {0}, memoryCell02[0].shape = {1}, out02 = {2}'.format(out02.shape, memoryCell02[0].shape, out02))
            wordEmbedding_index += 1
        # ***********************************************************全连接层***********************************************************
        # out: [b, 1, 64] => [b, 1, 1]
        out_logit = self.outlayer(out02)  # out02代表了每个句子的语义信息的提取
        print('out_logit.shape = {0}, out_logit = {1}'.format(out_logit.shape, out_logit))
        out_prob = tf.sigmoid(out_logit)  # p(y is pos|x)
        print('out_prob.shape = {0}, out_prob = {1}, {2}'.format(out_prob.shape, out_prob, '\n'))
        return out_prob


def main():
    output_dim = 64     # 设定输出的隐藏状态维度  [b, 100] => [b,64]
    epochs = 4
    t0 = time.time()
    network = MyRNN(output_dim)
    # 不需要设置from_logits=True,因为MyRNN()中已经设定了激活函数层 out_prob = tf.sigmoid(X)
    # metrics=['accuracy']表示打印测试数据
    network.compile(optimizer=keras.optimizers.Adam(0.001),
                    loss=tf.losses.BinaryCrossentropy(),
                    metrics=['accuracy'])
    print('\n***********************************************************训练network:开始***********************************************************')
    network.fit(db_train, epochs=epochs, validation_data=db_val)
    print('***********************************************************训练network:结束***********************************************************')
    print('\n***********************************************************评估network(其实训练时已经评估):开始***********************************************************')
    network.evaluate(db_val)  # 评估模型
    print('***********************************************************评估network(其实训练时已经评估):结束***********************************************************')
    t1 = time.time()
    print('total time cost:', t1 - t0)


if __name__ == '__main__':
    main()

打印结果:

X_train[0] = [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32],
Y_train[0] = 1
X_train.shpae = (25000,),Y_train.shpae = (25000,)------------type(X_train) = <class 'numpy.ndarray'>type(Y_train) = <class 'numpy.ndarray'>
X_train.shpae = (25000, 80),Y_train.shpae = (25000,),tf.reduce_max(Y_train) = 1,tf.reduce_min(Y_train) = 0
db_train = <BatchDataset shapes: ((500, 80), (500,)), types: (tf.int32, tf.int64)>len(db_train) = 50

***********************************************************训练network:开始***********************************************************
Epoch 1/4

wordEmbeddings.shape = (500, 80, 100), wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
wordEmbedding.shape = (500, 100), wordEmbedding = Tensor("my_rnn/unstack:0", shape=(500, 100), dtype=float32)
out01.shape = (500, 64), memoryCell01[0].shape = (500, 64), out01 = Tensor("my_rnn/lstm_cell/mul_3:0", shape=(500, 64), dtype=float32)
out02.shape = (500, 64), memoryCell02[0].shape = (500, 64), out02 = Tensor("my_rnn/lstm_cell_1/mul_3:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 


wordEmbeddings.shape = (500, 80, 100), wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
wordEmbedding.shape = (500, 100), wordEmbedding = Tensor("my_rnn/unstack:0", shape=(500, 100), dtype=float32)
out01.shape = (500, 64), memoryCell01[0].shape = (500, 64), out01 = Tensor("my_rnn/lstm_cell/mul_3:0", shape=(500, 64), dtype=float32)
out02.shape = (500, 64), memoryCell02[0].shape = (500, 64), out02 = Tensor("my_rnn/lstm_cell_1/mul_3:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - ETA: 0s - loss: 0.6419 - accuracy: 0.6000
wordEmbeddings.shape = (500, 80, 100), wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
wordEmbedding.shape = (500, 100), wordEmbedding = Tensor("my_rnn/unstack:0", shape=(500, 100), dtype=float32)
out01.shape = (500, 64), memoryCell01[0].shape = (500, 64), out01 = Tensor("my_rnn/lstm_cell/mul_3:0", shape=(500, 64), dtype=float32)
out02.shape = (500, 64), memoryCell02[0].shape = (500, 64), out02 = Tensor("my_rnn/lstm_cell_1/mul_3:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - 20s 244ms/step - loss: 0.6402 - accuracy: 0.6018 - val_loss: 0.3965 - val_accuracy: 0.8207
Epoch 2/4
50/50 [==============================] - 9s 187ms/step - loss: 0.3500 - accuracy: 0.8469 - val_loss: 0.3740 - val_accuracy: 0.8362
Epoch 3/4
50/50 [==============================] - 9s 185ms/step - loss: 0.2635 - accuracy: 0.8947 - val_loss: 0.4112 - val_accuracy: 0.8321
Epoch 4/4
50/50 [==============================] - 10s 193ms/step - loss: 0.2153 - accuracy: 0.9161 - val_loss: 0.4534 - val_accuracy: 0.8260
***********************************************************训练network:结束***********************************************************

***********************************************************评估network(其实训练时已经评估):开始***********************************************************
50/50 [==============================] - 2s 50ms/step - loss: 0.4534 - accuracy: 0.8260
***********************************************************评估network(其实训练时已经评估):结束***********************************************************
total time cost: 50.509249687194824

Process finished with exit code 0

2、Tensorflow2-LSTM案例-imdb数据集【电影评论情感二分类】

import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import time

tf.random.set_seed(22)
np.random.seed(22)

assert tf.__version__.startswith('2.')

batch_size = 500  # 每次训练500个句子

total_words = 10000  # the most frequest words
max_review_len = 80  # 设置句子长度,如果有的句子的长度不到80则补齐,如果有的句子超过80则截断
embedding_len = 100  # 每个单词转为向量后的向量维度

# 一、获取数据集
(X_train, Y_train), (X_val, Y_val) = keras.datasets.imdb.load_data(num_words=total_words)
print('X_train[0] = {0},\nY_train[0] = {1}'.format(X_train[0], Y_train[0]))
print('X_train.shpae = {0},Y_train.shpae = {1}------------type(X_train) = {2},type(Y_train) = {3}'.format(X_train.shape, Y_train.shape, type(X_train), type(Y_train)))

# 二、数据处理
# 2.1 # 设置句子统一长度
X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_review_len)  # 设置句子长度    [b, 80]
X_val = keras.preprocessing.sequence.pad_sequences(X_val, maxlen=max_review_len)  # 设置句子长度
print('X_train.shpae = {0},Y_train.shpae = {1},tf.reduce_max(Y_train) = {2},tf.reduce_min(Y_train) = {3}'.format(X_train.shape, Y_train.shape, tf.reduce_max(Y_train), tf.reduce_min(Y_train)))
# 2.1 处理训练集为batch模型
db_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
db_train = db_train.shuffle(1000).batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
db_val = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
db_val = db_val.batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
print('db_train = {0},len(db_train) = {1}'.format(db_train, len(db_train)))


class MyRNN(keras.Model):
    def __init__(self, output_dim):
        super(MyRNN, self).__init__()
        # ***********************************************************Embedding***********************************************************
        # transform text to embedding representation
        # 将每一个句子(维度为[80,1],80表示每个句子包含的word数量,1表示1个word)变换为wordEmbedding(维度为[80,100],80表示每个句子包含的word数量,100表示每个wordEmbedding的维度)
        # [b, 80, 1] => [b, 80, 100]
        # input_dim:表示输入维度,即设定词库总单词数量;b
        # input_length:表示每个句子统一长度(包含的单词数量);80
        # output_dim:表示输出维度,即每个单词转为向量后的向量维度;100
        self.embedding = layers.Embedding(input_dim=total_words, input_length=max_review_len, output_dim=embedding_len)
        # ***********************************************************RNN神经网络结构:SimpleRNN 表示SimpleRNN连接层***********************************************************
        # [b, 80, 100]=>[b, 64]
        self.rnn = keras.Sequential([
            # output_dim: dimensionality of the output space. 隐藏状态的维度;dropout 防止过拟合
            # return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence.
            # unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used.
            # Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences.
            layers.LSTM(output_dim, dropout=0.5, return_sequences=True, unroll=True),
            layers.LSTM(output_dim, dropout=0.5, unroll=True)
        ])
        # ***********************************************************全连接层***********************************************************
        # [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)

    def call(self, inputs, training=None):
        """
        net(x) net(x, training=True) :train mode
        net(x, training=False): test
        :param inputs: [b, 80]
        :param training:
        :return:
        """
        # ***********************************************************Embedding***********************************************************
        # embedding: [b, 80, 1] => [b, 80, 100]
        x_wordEmbeddings = self.embedding(inputs)  # inputs 为1个batch的句子文本
        print('\nx_wordEmbeddings.shape = {0}, x_wordEmbeddings = {1}'.format(x_wordEmbeddings.shape, x_wordEmbeddings))
        # ***********************************************************RNN神经网络结构计算***********************************************************
        out = self.rnn(x_wordEmbeddings)  # x: [b, 80, 100] => [b, 64]
        print('out.shape = {0}, out = {1}'.format(out.shape, out))
        out_logit = self.outlayer(out)  # 隐含状态=>0/1   out: [b, 64] => [b, 1]
        print('out_logit.shape = {0}, out_logit = {1}'.format(out_logit.shape, out_logit))
        out_prob = tf.sigmoid(out_logit)  # p(y is pos|x)
        print('out_prob.shape = {0}, out_prob = {1}, {2}'.format(out_prob.shape, out_prob, '\n'))
        return out_prob


def main():
    output_dim = 64     # 设定输出的隐藏状态维度  [b, 100] => [b,64]
    epochs = 4
    t0 = time.time()
    network = MyRNN(output_dim)
    # 不需要设置from_logits=True,因为MyRNN()中已经设定了激活函数层 out_prob = tf.sigmoid(X)
    # metrics=['accuracy']表示打印测试数据
    network.compile(optimizer=keras.optimizers.Adam(0.001),
                    loss=tf.losses.BinaryCrossentropy(),
                    metrics=['accuracy'])
    print('\n***********************************************************训练network:开始***********************************************************')
    network.fit(db_train, epochs=epochs, validation_data=db_val)
    print('***********************************************************训练network:结束***********************************************************')
    print('\n***********************************************************评估network(其实训练时已经评估):开始***********************************************************')
    network.evaluate(db_val)  # 评估模型
    print('***********************************************************评估network(其实训练时已经评估):结束***********************************************************')
    t1 = time.time()
    print('total time cost:', t1 - t0)


if __name__ == '__main__':
    main()

打印结果:

X_train[0] = [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32],
Y_train[0] = 1
X_train.shpae = (25000,),Y_train.shpae = (25000,)------------type(X_train) = <class 'numpy.ndarray'>type(Y_train) = <class 'numpy.ndarray'>
X_train.shpae = (25000, 80),Y_train.shpae = (25000,),tf.reduce_max(Y_train) = 1,tf.reduce_min(Y_train) = 0
db_train = <BatchDataset shapes: ((500, 80), (500,)), types: (tf.int32, tf.int64)>len(db_train) = 50
WARNING:tensorflow:Layer lstm will not use cuDNN kernel since it doesn't meet the cuDNN kernel criteria. It will use generic GPU kernel as fallback when running on GPU
WARNING:tensorflow:Layer lstm_1 will not use cuDNN kernel since it doesn't meet the cuDNN kernel criteria. It will use generic GPU kernel as fallback when running on GPU

***********************************************************训练network:开始***********************************************************
Epoch 1/4

x_wordEmbeddings.shape = (500, 80, 100), x_wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
out.shape = (500, 64), out = Tensor("my_rnn/sequential/lstm_1/lstm_cell_1/mul_319:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 


x_wordEmbeddings.shape = (500, 80, 100), x_wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
out.shape = (500, 64), out = Tensor("my_rnn/sequential/lstm_1/lstm_cell_1/mul_319:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - ETA: 0s - loss: 0.6580 - accuracy: 0.5754
x_wordEmbeddings.shape = (500, 80, 100), x_wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
out.shape = (500, 64), out = Tensor("my_rnn/sequential/lstm_1/lstm_cell_1/mul_319:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - 21s 235ms/step - loss: 0.6566 - accuracy: 0.5772 - val_loss: 0.4175 - val_accuracy: 0.8122
Epoch 2/4
50/50 [==============================] - 10s 199ms/step - loss: 0.3870 - accuracy: 0.8297 - val_loss: 0.3594 - val_accuracy: 0.8421
Epoch 3/4
50/50 [==============================] - 10s 206ms/step - loss: 0.2956 - accuracy: 0.8808 - val_loss: 0.3779 - val_accuracy: 0.8359
Epoch 4/4
50/50 [==============================] - 10s 198ms/step - loss: 0.2560 - accuracy: 0.8976 - val_loss: 0.3988 - val_accuracy: 0.8343
***********************************************************训练network:结束***********************************************************

***********************************************************评估network(其实训练时已经评估):开始***********************************************************
50/50 [==============================] - 2s 50ms/step - loss: 0.3988 - accuracy: 0.8343
***********************************************************评估network(其实训练时已经评估):结束***********************************************************
total time cost: 53.280513763427734

Process finished with exit code 0

三、GRU案例

1、Tensorflow2-GRUCell案例(构建每一个cell及memorycell)-imdb数据集【电影评论情感二分类】

import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import time

tf.random.set_seed(22)
np.random.seed(22)

assert tf.__version__.startswith('2.')

batch_size = 500  # 每次训练500个句子

total_words = 10000  # the most frequest words
max_review_len = 80  # 设置句子长度,如果有的句子的长度不到80则补齐,如果有的句子超过80则截断
embedding_len = 100  # 每个单词转为向量后的向量维度

# 一、获取数据集
(X_train, Y_train), (X_val, Y_val) = keras.datasets.imdb.load_data(num_words=total_words)
print('X_train[0] = {0},\nY_train[0] = {1}'.format(X_train[0], Y_train[0]))
print('X_train.shpae = {0},Y_train.shpae = {1}------------type(X_train) = {2},type(Y_train) = {3}'.format(X_train.shape, Y_train.shape, type(X_train), type(Y_train)))

# 二、数据处理
# 2.1 # 设置句子统一长度
X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_review_len)  # 设置句子长度    [b, 80]
X_val = keras.preprocessing.sequence.pad_sequences(X_val, maxlen=max_review_len)  # 设置句子长度
print('X_train.shpae = {0},Y_train.shpae = {1},tf.reduce_max(Y_train) = {2},tf.reduce_min(Y_train) = {3}'.format(X_train.shape, Y_train.shape, tf.reduce_max(Y_train), tf.reduce_min(Y_train)))
# 2.1 处理训练集为batch模型
db_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
db_train = db_train.shuffle(1000).batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
db_val = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
db_val = db_val.batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
print('db_train = {0},len(db_train) = {1}'.format(db_train, len(db_train)))


class MyRNN(keras.Model):
    def __init__(self, output_dim):
        super(MyRNN, self).__init__()
        # ***********************************************************memoryCell***********************************************************
        # [b, 64]
        # memoryCell用于保存上一次的隐藏状态值h_{t-1},用于计算本次的h_t时的输入值
        # 使用多个memoryCell串联即实现Deep的作用
        self.memoryCell01 = [tf.zeros([batch_size, output_dim])]  # 初始化memoryCell01,维度为  [b, 64]
        self.memoryCell02 = [tf.zeros([batch_size, output_dim])]  # 初始化memoryCell02,维度为  [b, 64]
        # ***********************************************************Embedding***********************************************************
        # 将每一个句子(维度为[80,1],80表示每个句子包含的word数量,1表示1个word)变换为wordEmbedding(维度为[80,100],80表示每个句子包含的word数量,100表示每个wordEmbedding的维度)
        # [b, 80, 1] => [b, 80, 100]
        # input_dim:表示输入维度,即设定词库总单词数量;b
        # input_length:表示每个句子统一长度(包含的单词数量);80
        # output_dim:表示输出维度,即每个单词转为向量后的向量维度;100
        self.embedding = layers.Embedding(input_dim=total_words, input_length=max_review_len, output_dim=embedding_len)
        # ***********************************************************RNNCell Layer***********************************************************
        # [b, 80, 100]=>[b, 64]
        self.rnn_cell01 = layers.GRUCell(output_dim, dropout=0.2)  # output_dim: dimensionality of the output space. 隐藏状态的维度;dropout 防止过拟合
        self.rnn_cell02 = layers.GRUCell(output_dim, dropout=0.2)
        # ***********************************************************全连接层***********************************************************
        # [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)

    def call(self, inputs, training=None):
        """
        net(x) net(x, training=True) :train mode
        net(x, training=False): test mode
        :param inputs: [b, 80, 1]
        :param training:
        :return:
        """
        # ***********************************************************Embedding***********************************************************
        # embedding: [b, 80, 1] => [b, 80, 100]
        wordEmbeddings = self.embedding(inputs)  # inputs 为1个batch的句子文本
        print('\nwordEmbeddings.shape = {0}, wordEmbeddings = {1}'.format(wordEmbeddings.shape, wordEmbeddings))
        # rnn cell compute
        # ***********************************************************RNNCell Layer***********************************************************
        # [b, 80, 100] => [b, 1, 64],每个句子都从降维:[80, 100]=>[1, 64]
        memoryCell01 = self.memoryCell01
        memoryCell02 = self.memoryCell02
        wordEmbedding_index = 0
        for wordEmbedding in tf.unstack(wordEmbeddings, axis=1):  # wordEmbedding: [b, 100],将每个句子中的80个单词展开,即按读取该句子的时间轴展开
            # 隐含状态:out01/out02: [b, 64]
            # h_t = x_t×w_{xh}+h_{t-1}×w_{hh};其中:x_t=wordEmbedding;h_{t-1}=memoryCell01;输出值h_t = out01
            # memoryCell01保存2个值:第一个值是隐藏状态C_t,第二个值是隐藏状态输出值h_t
            out01, memoryCell01 = self.rnn_cell01(wordEmbedding, memoryCell01, training=training)  # 用输出值更新memoryCell01;   training=True 表示模式是训练模式,dropout功能有效,默认是True
            # 将rnn_cell01的输出值out01传入下一个rnn_cell02提升RNNCell Layer的提取能力
            # memoryCell01保存2个值:第一个值是隐藏状态C_t,第二个值是隐藏状态输出值h_t
            out02, memoryCell02 = self.rnn_cell02(out01, memoryCell02, training=training)  # 用输出值更新memoryCell02; training=True 表示模式是训练模式,dropout功能有效,默认是True
            if wordEmbedding_index == 0:
                print('wordEmbedding.shape = {0}, wordEmbedding = {1}'.format(wordEmbedding.shape, wordEmbedding))
                print('out01.shape = {0}, memoryCell01[0].shape = {1}, out01 = {2}'.format(out01.shape, memoryCell01[0].shape, out01))
                print('out02.shape = {0}, memoryCell02[0].shape = {1}, out02 = {2}'.format(out02.shape, memoryCell02[0].shape, out02))
            wordEmbedding_index += 1
        # ***********************************************************全连接层***********************************************************
        # out: [b, 1, 64] => [b, 1, 1]
        out_logit = self.outlayer(out02)  # out02代表了每个句子的语义信息的提取
        print('out_logit.shape = {0}, out_logit = {1}'.format(out_logit.shape, out_logit))
        out_prob = tf.sigmoid(out_logit)  # p(y is pos|x)
        print('out_prob.shape = {0}, out_prob = {1}, {2}'.format(out_prob.shape, out_prob, '\n'))
        return out_prob


def main():
    output_dim = 64     # 设定输出的隐藏状态维度  [b, 100] => [b,64]
    epochs = 4
    t0 = time.time()
    network = MyRNN(output_dim)
    # 不需要设置from_logits=True,因为MyRNN()中已经设定了激活函数层 out_prob = tf.sigmoid(X)
    # metrics=['accuracy']表示打印测试数据
    network.compile(optimizer=keras.optimizers.Adam(0.001),
                    loss=tf.losses.BinaryCrossentropy(),
                    metrics=['accuracy'])
    print('\n***********************************************************训练network:开始***********************************************************')
    network.fit(db_train, epochs=epochs, validation_data=db_val)
    print('***********************************************************训练network:结束***********************************************************')
    print('\n***********************************************************评估network(其实训练时已经评估):开始***********************************************************')
    network.evaluate(db_val)  # 评估模型
    print('***********************************************************评估network(其实训练时已经评估):结束***********************************************************')
    t1 = time.time()
    print('total time cost:', t1 - t0)


if __name__ == '__main__':
    main()

打印结果:

X_train[0] = [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32],
Y_train[0] = 1
X_train.shpae = (25000,),Y_train.shpae = (25000,)------------type(X_train) = <class 'numpy.ndarray'>type(Y_train) = <class 'numpy.ndarray'>
X_train.shpae = (25000, 80),Y_train.shpae = (25000,),tf.reduce_max(Y_train) = 1,tf.reduce_min(Y_train) = 0
db_train = <BatchDataset shapes: ((500, 80), (500,)), types: (tf.int32, tf.int64)>len(db_train) = 50

***********************************************************训练network:开始***********************************************************
Epoch 1/4

wordEmbeddings.shape = (500, 80, 100), wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
wordEmbedding.shape = (500, 100), wordEmbedding = Tensor("my_rnn/unstack:0", shape=(500, 100), dtype=float32)
out01.shape = (500, 64), memoryCell01[0].shape = (500, 64), out01 = Tensor("my_rnn/gru_cell/add_3:0", shape=(500, 64), dtype=float32)
out02.shape = (500, 64), memoryCell02[0].shape = (500, 64), out02 = Tensor("my_rnn/gru_cell_1/add_3:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 


wordEmbeddings.shape = (500, 80, 100), wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
wordEmbedding.shape = (500, 100), wordEmbedding = Tensor("my_rnn/unstack:0", shape=(500, 100), dtype=float32)
out01.shape = (500, 64), memoryCell01[0].shape = (500, 64), out01 = Tensor("my_rnn/gru_cell/add_3:0", shape=(500, 64), dtype=float32)
out02.shape = (500, 64), memoryCell02[0].shape = (500, 64), out02 = Tensor("my_rnn/gru_cell_1/add_3:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - ETA: 0s - loss: 0.6669 - accuracy: 0.5711
wordEmbeddings.shape = (500, 80, 100), wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
wordEmbedding.shape = (500, 100), wordEmbedding = Tensor("my_rnn/unstack:0", shape=(500, 100), dtype=float32)
out01.shape = (500, 64), memoryCell01[0].shape = (500, 64), out01 = Tensor("my_rnn/gru_cell/add_3:0", shape=(500, 64), dtype=float32)
out02.shape = (500, 64), memoryCell02[0].shape = (500, 64), out02 = Tensor("my_rnn/gru_cell_1/add_3:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - 23s 265ms/step - loss: 0.6657 - accuracy: 0.5727 - val_loss: 0.4395 - val_accuracy: 0.7975
Epoch 2/4
50/50 [==============================] - 11s 217ms/step - loss: 0.3770 - accuracy: 0.8304 - val_loss: 0.3646 - val_accuracy: 0.8424
Epoch 3/4
50/50 [==============================] - 10s 204ms/step - loss: 0.2573 - accuracy: 0.8946 - val_loss: 0.4230 - val_accuracy: 0.8372
Epoch 4/4
50/50 [==============================] - 10s 209ms/step - loss: 0.2107 - accuracy: 0.9183 - val_loss: 0.4979 - val_accuracy: 0.8305
***********************************************************训练network:结束***********************************************************

***********************************************************评估network(其实训练时已经评估):开始***********************************************************
50/50 [==============================] - 3s 59ms/step - loss: 0.4979 - accuracy: 0.8305
***********************************************************评估network(其实训练时已经评估):结束***********************************************************
total time cost: 57.23774862289429

Process finished with exit code 0

2、Tensorflow2-GRU案例-imdb数据集【电影评论情感二分类】

import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import time

tf.random.set_seed(22)
np.random.seed(22)

assert tf.__version__.startswith('2.')

batch_size = 500  # 每次训练500个句子

total_words = 10000  # the most frequest words
max_review_len = 80  # 设置句子长度,如果有的句子的长度不到80则补齐,如果有的句子超过80则截断
embedding_len = 100  # 每个单词转为向量后的向量维度

# 一、获取数据集
(X_train, Y_train), (X_val, Y_val) = keras.datasets.imdb.load_data(num_words=total_words)
print('X_train[0] = {0},\nY_train[0] = {1}'.format(X_train[0], Y_train[0]))
print('X_train.shpae = {0},Y_train.shpae = {1}------------type(X_train) = {2},type(Y_train) = {3}'.format(X_train.shape, Y_train.shape, type(X_train), type(Y_train)))

# 二、数据处理
# 2.1 # 设置句子统一长度
X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_review_len)  # 设置句子长度    [b, 80]
X_val = keras.preprocessing.sequence.pad_sequences(X_val, maxlen=max_review_len)  # 设置句子长度
print('X_train.shpae = {0},Y_train.shpae = {1},tf.reduce_max(Y_train) = {2},tf.reduce_min(Y_train) = {3}'.format(X_train.shape, Y_train.shape, tf.reduce_max(Y_train), tf.reduce_min(Y_train)))
# 2.1 处理训练集为batch模型
db_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
db_train = db_train.shuffle(1000).batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
db_val = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
db_val = db_val.batch(batch_size, drop_remainder=True)  # 通过 drop_remainder=True 把 最后一个不满足batch_size大小的batch丢弃掉
print('db_train = {0},len(db_train) = {1}'.format(db_train, len(db_train)))


class MyRNN(keras.Model):
    def __init__(self, output_dim):
        super(MyRNN, self).__init__()
        # ***********************************************************Embedding***********************************************************
        # transform text to embedding representation
        # 将每一个句子(维度为[80,1],80表示每个句子包含的word数量,1表示1个word)变换为wordEmbedding(维度为[80,100],80表示每个句子包含的word数量,100表示每个wordEmbedding的维度)
        # [b, 80, 1] => [b, 80, 100]
        # input_dim:表示输入维度,即设定词库总单词数量;b
        # input_length:表示每个句子统一长度(包含的单词数量);80
        # output_dim:表示输出维度,即每个单词转为向量后的向量维度;100
        self.embedding = layers.Embedding(input_dim=total_words, input_length=max_review_len, output_dim=embedding_len)
        # ***********************************************************RNN神经网络结构:SimpleRNN 表示SimpleRNN连接层***********************************************************
        # [b, 80, 100]=>[b, 64]
        self.rnn = keras.Sequential([
            # output_dim: dimensionality of the output space. 隐藏状态的维度;dropout 防止过拟合
            # return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence.
            # unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used.
            # Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences.
            layers.GRU(output_dim, dropout=0.5, return_sequences=True, unroll=True),
            layers.GRU(output_dim, dropout=0.5, unroll=True)
        ])
        # ***********************************************************全连接层***********************************************************
        # [b, 64] => [b, 1]
        self.outlayer = layers.Dense(1)

    def call(self, inputs, training=None):
        """
        net(x) net(x, training=True) :train mode
        net(x, training=False): test
        :param inputs: [b, 80]
        :param training:
        :return:
        """
        # ***********************************************************Embedding***********************************************************
        # embedding: [b, 80, 1] => [b, 80, 100]
        x_wordEmbeddings = self.embedding(inputs)  # inputs 为1个batch的句子文本
        print('\nx_wordEmbeddings.shape = {0}, x_wordEmbeddings = {1}'.format(x_wordEmbeddings.shape, x_wordEmbeddings))
        # ***********************************************************RNN神经网络结构计算***********************************************************
        out = self.rnn(x_wordEmbeddings)  # x: [b, 80, 100] => [b, 64]
        print('out.shape = {0}, out = {1}'.format(out.shape, out))
        out_logit = self.outlayer(out)  # 隐含状态=>0/1   out: [b, 64] => [b, 1]
        print('out_logit.shape = {0}, out_logit = {1}'.format(out_logit.shape, out_logit))
        out_prob = tf.sigmoid(out_logit)  # p(y is pos|x)
        print('out_prob.shape = {0}, out_prob = {1}, {2}'.format(out_prob.shape, out_prob, '\n'))
        return out_prob


def main():
    output_dim = 64
    epochs = 4
    t0 = time.time()
    network = MyRNN(output_dim)
    # 不需要设置from_logits=True,因为MyRNN()中已经设定了激活函数层 out_prob = tf.sigmoid(X)
    # metrics=['accuracy']表示打印测试数据
    network.compile(optimizer=keras.optimizers.Adam(0.001),
                    loss=tf.losses.BinaryCrossentropy(),
                    metrics=['accuracy'])
    print('\n***********************************************************训练network:开始***********************************************************')
    network.fit(db_train, epochs=epochs, validation_data=db_val)
    print('***********************************************************训练network:结束***********************************************************')
    print('\n***********************************************************评估network(其实训练时已经评估):开始***********************************************************')
    network.evaluate(db_val)  # 评估模型
    print('***********************************************************评估network(其实训练时已经评估):结束***********************************************************')
    t1 = time.time()
    print('total time cost:', t1 - t0)


if __name__ == '__main__':
    main()

打印结果:

X_train[0] = [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32],
Y_train[0] = 1
X_train.shpae = (25000,),Y_train.shpae = (25000,)------------type(X_train) = <class 'numpy.ndarray'>type(Y_train) = <class 'numpy.ndarray'>
X_train.shpae = (25000, 80),Y_train.shpae = (25000,),tf.reduce_max(Y_train) = 1,tf.reduce_min(Y_train) = 0
db_train = <BatchDataset shapes: ((500, 80), (500,)), types: (tf.int32, tf.int64)>len(db_train) = 50

***********************************************************训练network:开始***********************************************************
WARNING:tensorflow:Layer gru will not use cuDNN kernel since it doesn't meet the cuDNN kernel criteria. It will use generic GPU kernel as fallback when running on GPU
WARNING:tensorflow:Layer gru_1 will not use cuDNN kernel since it doesn't meet the cuDNN kernel criteria. It will use generic GPU kernel as fallback when running on GPU
Epoch 1/4

x_wordEmbeddings.shape = (500, 80, 100), x_wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
out.shape = (500, 64), out = Tensor("my_rnn/sequential/gru_1/gru_cell_1/add_319:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 


x_wordEmbeddings.shape = (500, 80, 100), x_wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
out.shape = (500, 64), out = Tensor("my_rnn/sequential/gru_1/gru_cell_1/add_319:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - ETA: 0s - loss: 0.6784 - accuracy: 0.5487
x_wordEmbeddings.shape = (500, 80, 100), x_wordEmbeddings = Tensor("my_rnn/embedding/embedding_lookup/Identity:0", shape=(500, 80, 100), dtype=float32)
out.shape = (500, 64), out = Tensor("my_rnn/sequential/gru_1/gru_cell_1/add_319:0", shape=(500, 64), dtype=float32)
out_logit.shape = (500, 1), out_logit = Tensor("my_rnn/dense/BiasAdd:0", shape=(500, 1), dtype=float32)
out_prob.shape = (500, 1), out_prob = Tensor("my_rnn/Sigmoid:0", shape=(500, 1), dtype=float32), 

50/50 [==============================] - 26s 304ms/step - loss: 0.6775 - accuracy: 0.5501 - val_loss: 0.4695 - val_accuracy: 0.7734
Epoch 2/4
50/50 [==============================] - 11s 223ms/step - loss: 0.4109 - accuracy: 0.8152 - val_loss: 0.3752 - val_accuracy: 0.8306
Epoch 3/4
50/50 [==============================] - 11s 213ms/step - loss: 0.2942 - accuracy: 0.8750 - val_loss: 0.3690 - val_accuracy: 0.8438
Epoch 4/4
50/50 [==============================] - 11s 228ms/step - loss: 0.2450 - accuracy: 0.9005 - val_loss: 0.3863 - val_accuracy: 0.8379
***********************************************************训练network:结束***********************************************************

***********************************************************评估network(其实训练时已经评估):开始***********************************************************
50/50 [==============================] - 3s 57ms/step - loss: 0.3863 - accuracy: 0.8379
***********************************************************评估network(其实训练时已经评估):结束***********************************************************
total time cost: 62.06642150878906

Process finished with exit code 0

猜你喜欢

转载自blog.csdn.net/u013250861/article/details/113732848