TF2 RNN篇之情感分析实战【三种方式解决】
RNN层
在tensorflow2的框架下,SimpleRNN层可以用keras.SimpleRNN或keras.SimpleRNNCell来表示,keras.SimpleRNN是高级的封装类不需要了解rnn的原理便可以使用,keras.SimpleRNNCell是较接近底层的类需要自己去更新out和state,我们先使用keras.SimpleRNNCell来思想
单层SimpleRNNCell训练
加载必要的库
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
tf.random.set_seed(2233)
np.random.seed(2233)
assert tf.__version__.startswith('2.')
加载数据集并构建tensor数据
数据集我们使用keras模块下自带的数据集,加载数据集的时候有一个参数num_words,这个参数的含义是加载单词的数量,因为单词库里有很多的生僻词但是这些生僻词我们并不需要只要加载10000个常用单词即可,这样虽然会影响对部分单词的识别但是不影响主要的意思。
total_words = 10000
batchsz = 128
embedding_len = 100
# the max length of each sentence
max_review_len = 80
# select 10000 frequency words
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# x_train: [b,80]
# x_test : [b,80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
# build train dataset and test dataset
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
搭建网络
单层的网络搭建还是比较简单的,不理解意思的可以去看看我之前的几篇博客
class MyRNN(keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
# [b, 64]
self.state0 = [tf.zeros([batchsz, units])]
# transform text to embedding representation
# [b, 80] => [b, 80, 100]
self.embedding = layers.Embedding(total_words, embedding_len,
input_length=max_review_len)
# [b,80,100],h_dim:64
# SimpleRNN
self.rnn_cell0 = layers.SimpleRNNCell(units, dropout=0.5)
self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.5)
# fc,[b,80,100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
def call(self, inputs, training=None):
"""
net(x) net(x,training=True) : train mode
net(x) ,training=False : test
:param inputs: [b, 80]
:param training:
:return:
"""
# [b, 80]
x = inputs
# embedding: [b,80] => [b,80, 100]
x = self.embedding(x)
# rnn cell compute
# [b,80,100] => [b,64]
state0 = self.state0
for word in tf.unstack(x, axis=1): # word: [b,100]
# h1 = x*wxh+h*whh (out = state0)
# out0: [b,64]
out0, state0 = self.rnn_cell0(word, state0, training)
# out:[b,64] => [b,1]
x = self.outlayer(out0)
# p(y is pos x)
prob = tf.sigmoid(x)
return prob
进行训练
训练我们就使用.fit 的方法训练即可,主要是要看看网络的效果怎么样
def main():
units = 64
epochs = 4
model = MyRNN(units)
model.compile(optimizer=keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=['accuracy'], experimental_run_tf_function=False)
model.fit(db_train, epochs=epochs, validation_data=db_test)
model.evaluate(db_test)
两层SimpleRNNCell训练
两层SimpleRNNCell,增加了一层,第二层在第一层的state的基础上进行更新,更改的代码很少
我贴上全部的代码可以自己对比一下
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
tf.random.set_seed(2233)
np.random.seed(2233)
assert tf.__version__.startswith('2.')
total_words = 10000
batchsz = 128
embedding_len = 100
# the max length of each sentence
max_review_len = 80
# select 10000 frequency words
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# x_train: [b,80]
# x_test : [b,80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
# build train dataset and test dataset
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape:', x_test.shape)
class MyRNN(keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
# [b, 64]
self.state0 = [tf.zeros([batchsz, units])]
self.state1 = [tf.zeros([batchsz, units])]
# transform text to embedding representation
# [b, 80] => [b, 80, 100]
self.embedding = layers.Embedding(total_words, embedding_len,
input_length=max_review_len)
# [b,80,100],h_dim:64
# SimpleRNN
self.rnn_cell0 = layers.SimpleRNNCell(units, dropout=0.5)
self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.5)
# fc,[b,80,100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
def call(self, inputs, training=None):
"""
net(x) net(x,training=True) : train mode
net(x) ,training=False : test
:param inputs: [b, 80]
:param training:
:return:
"""
# [b, 80]
x = inputs
# embedding: [b,80] => [b,80, 100]
x = self.embedding(x)
# rnn cell compute
# [b,80,100] => [b,64]
state0 = self.state0
state1 = self.state1
for word in tf.unstack(x, axis=1): # word: [b,100]
# h1 = x*wxh+h*whh (out = state0)
# out0: [b,64]
out0, state0 = self.rnn_cell0(word, state0, training)
out1, state1 = self.rnn_cell1(out0, state1)
# out:[b,64] => [b,1]
x = self.outlayer(out1)
# p(y is pos x)
prob = tf.sigmoid(x)
return prob
def main():
units = 64
epochs = 4
model = MyRNN(units)
model.compile(optimizer=keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=['accuracy'], experimental_run_tf_function=False)
model.fit(db_train, epochs=epochs, validation_data=db_test)
model.evaluate(db_test)
if __name__ == '__main__':
main()
用SimpleRNN实现
SimpleRNN是高级的封装类,只需要像搭积木一样把网络搭好,然后传参进去就可以了
# 构建
self.rnn = keras.Sequential([
layers.SimpleRNN(units,dropout=0.5,return_sequences=True,unroll=True),
layers.SimpleRNN(units,dropout=0.5,unroll=True)
])
# 训练
x =self.rnn(x)
全部代码:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.random.set_seed(2233)
np.random.seed(2233)
assert tf.__version__.startswith('2.')
total_words = 10000
batchsz = 128
embedding_len = 100
# the max length of each sentence
max_review_len = 80
# select 10000 frequency words
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
# x_train: [b,80]
# x_test : [b,80]
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
# build train dataset and test dataset
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.batch(batchsz, drop_remainder=True)
print('x_train shape:', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
print('x_test shape', x_test.shape)
class MyRNN(keras.Model):
def __init__(self, units):
super(MyRNN, self).__init__()
self.state0 = [tf.zeros([batchsz, units])]
self.state1 = [tf.zeros([batchsz, units])]
# transform text to embedding representation
# [b,80] ==> [b,80,100]
self.embedding = layers.Embedding(total_words,embedding_len,
input_length=max_review_len)
# [b,80,100],h_dim:64
self.rnn = keras.Sequential([
layers.SimpleRNN(units,dropout=0.5,return_sequences=True,unroll=True),
layers.SimpleRNN(units,dropout=0.5,unroll=True)
])
# fc,[b,80,100] => [b, 64] => [b, 1]
self.outlayer = layers.Dense(1)
def call(self, inputs, training=None):
"""
net(x) net(x,training=True) : train mode
net(x) ,training=False : test
:param inputs: [b, 80]
:param training:
:return:
"""
# [b, 80]
x = inputs
# embedding: [b,80] => [b,80, 100]
x = self.embedding(x)
# rnn cell compute
# [b,80,100] => [b,64] => [b,64]
x =self.rnn(x)
# out:[b,64] => [b,1]
x = self.outlayer(x)
# p(y is pos x)
prob = tf.sigmoid(x)
return prob
def main():
units = 64
epochs = 4
model = MyRNN(units)
model.compile(optimizer=keras.optimizers.Adam(0.001),
loss=tf.losses.BinaryCrossentropy(),
metrics=['accuracy'], experimental_run_tf_function=False)
model.fit(db_train, epochs=epochs, validation_data=db_test)
model.evaluate(db_test)
if __name__ == '__main__':
main()
参考书籍: TensorFlow 深度学习 — 龙龙老师