卷积神经网络:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist=input_data.read_data_sets('MNIST_data',one_hot=True)
# 定义Weight变量.其中我们使用tf.truncted_normal产生随机变量来进行初始化(stddev是标准差.这个函数产生正态分布,均值和标准差自己设定)
def weight_variable(shape):
initial=tf.truncated_normal(shape,stddev=0.1)
return tf.Variable(initial)
# 定义bias变量
def bias_variable(shape):
initial=tf.constant(0.1,shape=shape)
return tf.Variable(initial)
# 定义卷积层.tf.nn.conv2d函数是tensoflow里面的二维的卷积函数,定义步长strides=[1,1,1,1]值,strides[0]和strides[3]的两个1是默认值,中间两个1代表padding时在x方向运动一步,y方向运动一步,padding采用的方式是same padding
def conv2d(x,W):
return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
# 定义池化层.池化的核函数大小为2x2,因此ksize=[1,2,2,1],步长为2,因此strides=[1,2,2,1]
def max_pool_2x2(x):
return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
xs=tf.placeholder(tf.float32,[None,784]) # 28*28
ys=tf.placeholder(tf.float32,[None,10])
keep_prob=tf.placeholder(tf.float32)
# -1代表先不考虑输入的图片有多少(通过后面的数据可以计算得到),后面的1是channel的数量,因为我们输入的图片是黑白的,因此channel是1,如果是RGB图像,那么channel就是3
x_image=tf.reshape(xs,[-1,28,28,1])
# 第一个卷积层+池化层
W_conv1=weight_variable([5,5,1,32]) # 卷积核patch的大小(filter的尺寸)是5x5,因为黑白图片channel是1所以输入是1,图片经过卷积层后厚度(channel)变为32
b_conv1=bias_variable([32])
h_conv1=tf.nn.relu(conv2d(x_image,W_conv1)+b_conv1) # 因为采用了SAME的padding方式,输出图片的大小没有变化依然是28x28,只是厚度变厚了,因此现在的输出大小就变成了28x28x32
h_pool1=max_pool_2x2(h_conv1) # 14x14x32
# 第二个卷积层+池化层
W_conv2=weight_variable([5,5,32,64])
b_conv2=bias_variable([64])
h_conv2=tf.nn.relu(conv2d(h_pool1,W_conv2)+b_conv2) # 14x14x64
h_pool2=max_pool_2x2(h_conv2) # 7x7x64
# 第一个全连接层
# [n_samples,7,7,64]->>[n_samples,7*7*64],将h_pool2的输出值从一个三维的变为一维的数据
h_pool2_flat=tf.reshape(h_pool2,[-1,7*7*64])
W_fc1=weight_variable([7*7*64,1024]) # 输入为7 * 7 * 64,输出为1024
b_fc1=bias_variable([1024])
h_fc1=tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1)
h_fc1_drop=tf.nn.dropout(h_fc1,keep_prob)
# 第二个全连接层
W_fc2=weight_variable([1024,10])
b_fc2=bias_variable([10])
prediction=tf.nn.softmax(tf.matmul(h_fc1_drop,W_fc2)+b_fc2)
cross_entropy=tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),
reduction_indices=[1]))
# 用tf.train.AdamOptimizer()作为优化器 学习率为0.0001
train_step=tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs: v_xs, keep_prob: 1})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys, keep_prob: 1})
return result
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(1001):
batch_x, batch_y = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={xs: batch_x, ys: batch_y, keep_prob: 0.5})
if i % 50 == 0:
print(compute_accuracy(mnist.test.images[:1000], mnist.test.labels[:1000]))
Saver 保存读取(变量):
保存:
import tensorflow as tf
import numpy as np
# Save to file
# remember to define the same dtype and shape when restore
W = tf.Variable([[1,2,3],[3,4,5]], dtype=tf.float32, name='weights')
b = tf.Variable([[1,2,3]], dtype=tf.float32, name='biases')
init = tf.global_variables_initializer()
# Save to path: my_net/save_net.ckpt
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
save_path = saver.save(sess, "my_net/save_net.ckpt")
print("Save to path: ", save_path)
读取:
import tensorflow as tf
import numpy as np
# 先建立 W, b 的容器
W = tf.Variable(np.arange(6).reshape((2, 3)), dtype=tf.float32, name="weights")
b = tf.Variable(np.arange(3).reshape((1, 3)), dtype=tf.float32, name="biases")
# 这里不需要初始化init
saver = tf.train.Saver()
with tf.Session() as sess:
# 提取变量
saver.restore(sess, "my_net/save_net.ckpt")
print("weights:", sess.run(W))
print("biases:", sess.run(b))
"""
weights: [[ 1. 2. 3.]
[ 3. 4. 5.]]
biases: [[ 1. 2. 3.]]
"""
循环神经网络:
RNN:Recurrent Neural Networks
解决序列化(有顺序的)问题,RNN是个比较好的解决方案。
RNN在训练中很容易发生梯度爆炸和梯度消失,这导致训练时梯度不能在较长序列中一直传递下去,从而使RNN很难处理长距离的依赖。有一种改进之后的循环神经网络:长短时记忆网络(Long Short Term Memory Network, LSTM),它成功的解决了原始循环神经网络的缺陷,成为当前最流行的RNN,在语音识别、图片描述、自然语言处理等许多领域中成功应用。
RNN LSTM 循环神经网络 (分类例子):
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
tf.set_random_seed(1) # set random seed
# 导入数据
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# hyperparameters
lr = 0.001 # learning rate
training_iters = 100000 # train step 上限
batch_size = 128
n_steps = 28 # time steps(28 rows)
n_inputs = 28 # MNIST data input (28 columns)
n_hidden_units = 128 # neurons in hidden layer
n_classes = 10 # MNIST classes (0-9 digits)
# x y placeholder
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
# 对 weights biases 初始值的定义
weights = {
# shape (28, 128)
'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
# shape (128, 10)
'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
# shape (128, )
'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
# shape (10, )
'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}
# 开始定义 RNN 主体结构, 这个 RNN 总共有 3 个组成部分 ( input_layer, cell, output_layer)
def RNN(X, weights, biases):
# hidden layer for input to cell
# 原始的 X 是 3 维数据, 我们需要把它变成 2 维数据才能使用 weights 的矩阵乘法
# X ==> (128 batches * 28 steps, 28 inputs)
X = tf.reshape(X, [-1, n_inputs])
# X_in = W*X + b X_in ==> (128 batches * 28 steps, 128 hidden)
X_in = tf.matmul(X, weights['in']) + biases['in']
# X_in ==> (128 batches, 28 steps, 128 hidden) 转换回3维
X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
# cell
lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units, forget_bias=1.0, state_is_tuple=True)
# lstm cell is divided into two parts(c_state,m_state)
init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32) # 初始化全零 state
# hidden layer for output as the final results
# 如果 inputs 为 (batches, steps, inputs) ==> time_major=False;
# 如果 inputs 为 (steps, batches, inputs) ==> time_major=True;
outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)
# 方式一:直接调用final_state 中的 h_state (final_state[1]) 来进行运算
# results = tf.matmul(final_state[1], weights['out']) + biases['out']
# 方式二:调用最后一个outputs(在这个例子中,和上面的final_state[1]是一样的)
outputs = tf.unstack(tf.transpose(outputs, [1, 0, 2]))
results = tf.matmul(outputs[-1], weights['out']) + biases['out']
return results
pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
train_op = tf.train.AdamOptimizer(lr).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init = tf.global_variables_initializer()
# 训练 RNN
with tf.Session() as sess:
sess.run(init)
step = 0
while step * batch_size < training_iters:
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
sess.run([train_op], feed_dict={
x: batch_xs,
y: batch_ys,
})
if step % 20 == 0:
print(sess.run(accuracy, feed_dict={
x: batch_xs,
y: batch_ys,
}))
step += 1
RNN LSTM 循环神经网络 (回归例子):
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# 使用RNN来进行回归的训练 (Regression). 会使用到自己创建的sin曲线预测一条cos曲线
BATCH_START = 0 # 建立 batch data 时候的 index
TIME_STEPS = 20 # backpropagation through time 的 time_steps
BATCH_SIZE = 50
INPUT_SIZE = 1 # sin 数据输入 size
OUTPUT_SIZE = 1 # cos 数据输出 size
CELL_SIZE = 10 # RNN 的 hidden unit size
LR = 0.006 # learning rate
# 数据生成:定义一个生成数据的 get_batch function
def get_batch():
global BATCH_START, TIME_STEPS
# xs shape (50batch, 20steps)
xs = np.arange(BATCH_START, BATCH_START+TIME_STEPS*BATCH_SIZE).reshape((BATCH_SIZE, TIME_STEPS))
seq = np.sin(xs)
res = np.cos(xs)
BATCH_START += TIME_STEPS
# plt.plot(xs[0,:],res[0,:],'r',xs[0,:],seq[0,:],'b--')
# plt.show()
# returned seq, res and xs: shape (batch, step, input)
return [seq[:, :, np.newaxis], res[:, :, np.newaxis], xs]
# 定义 LSTM RNN 的主体结构
class LSTMRNN(object):
def __init__(self, n_steps, input_size, output_size, cell_size, batch_size):
self.n_steps = n_steps
self.input_size = input_size
self.output_size = output_size
self.cell_size = cell_size
self.batch_size = batch_size
with tf.name_scope('inputs'):
self.xs = tf.placeholder(tf.float32, [None, n_steps, input_size], name='xs')
self.ys = tf.placeholder(tf.float32, [None, n_steps, output_size], name='ys')
with tf.variable_scope('in_hidden'):
self.add_input_layer()
with tf.variable_scope('LSTM_cell'):
self.add_cell()
with tf.variable_scope('out_hidden'):
self.add_output_layer()
with tf.name_scope('cost'):
self.compute_cost()
with tf.name_scope('train'):
self.train_op = tf.train.AdamOptimizer(LR).minimize(self.cost)
def add_input_layer(self, ):
l_in_x = tf.reshape(self.xs, [-1, self.input_size], name='2_2D') # (batch*n_step, in_size)
# Ws (in_size, cell_size)
Ws_in = self._weight_variable([self.input_size, self.cell_size])
# bs (cell_size, )
bs_in = self._bias_variable([self.cell_size, ])
# l_in_y = (batch * n_steps, cell_size)
with tf.name_scope('Wx_plus_b'):
l_in_y = tf.matmul(l_in_x, Ws_in) + bs_in
# reshape l_in_y ==> (batch, n_steps, cell_size)
self.l_in_y = tf.reshape(l_in_y, [-1, self.n_steps, self.cell_size], name='2_3D')
def add_cell(self):
lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.cell_size, forget_bias=1.0, state_is_tuple=True)
with tf.name_scope('initial_state'):
self.cell_init_state = lstm_cell.zero_state(self.batch_size, dtype=tf.float32)
self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(
lstm_cell, self.l_in_y, initial_state=self.cell_init_state, time_major=False)
def add_output_layer(self):
# shape = (batch * steps, cell_size)
l_out_x = tf.reshape(self.cell_outputs, [-1, self.cell_size], name='2_2D')
Ws_out = self._weight_variable([self.cell_size, self.output_size])
bs_out = self._bias_variable([self.output_size, ])
# shape = (batch * steps, output_size)
with tf.name_scope('Wx_plus_b'):
self.pred = tf.matmul(l_out_x, Ws_out) + bs_out
def compute_cost(self): # 所求的为每个batch的loss
losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
[tf.reshape(self.pred, [-1], name='reshape_pred')],
[tf.reshape(self.ys, [-1], name='reshape_target')],
[tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)],
average_across_timesteps=True,
softmax_loss_function=self.ms_error,
name='losses'
)
with tf.name_scope('average_cost'):
self.cost = tf.div(
tf.reduce_sum(losses, name='losses_sum'),
self.batch_size,
name='average_cost') # 所有batch的loss先求和再取平均值
tf.summary.scalar('cost', self.cost)
def ms_error(self, labels, logits):
return tf.square(tf.subtract(labels, logits))
def _weight_variable(self, shape, name='weights'):
initializer = tf.random_normal_initializer(mean=0., stddev=1., )
return tf.get_variable(shape=shape, initializer=initializer, name=name)
def _bias_variable(self, shape, name='biases'):
initializer = tf.constant_initializer(0.1)
return tf.get_variable(shape=shape, initializer=initializer, name=name)
# 训练 LSTM RNN
if __name__ == '__main__':
# 搭建 LSTMRNN 模型
model = LSTMRNN(TIME_STEPS, INPUT_SIZE, OUTPUT_SIZE, CELL_SIZE, BATCH_SIZE)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
plt.ion() # 设置连续 plot
plt.show()
# 训练 200 次
for i in range(200):
seq, res, xs = get_batch() # 提取 batch data
if i == 0:
# 初始化 data
feed_dict = {
model.xs: seq,
model.ys: res,
}
else:
feed_dict = {
model.xs: seq,
model.ys: res,
model.cell_init_state: state # 保持 state 的连续性
}
# 训练
_, cost, state, pred = sess.run(
[model.train_op, model.cost, model.cell_final_state, model.pred],
feed_dict=feed_dict)
# plotting
plt.plot(xs[0, :], res[0].flatten(), 'r', xs[0, :], pred.flatten()[:TIME_STEPS], 'b--')
plt.ylim((-1.2, 1.2))
plt.draw()
plt.pause(0.3) # 每0.3 s刷新一次
# 打印 cost 结果
if i % 20 == 0:
print('cost: ', round(cost, 4))
cost: 6.8876
cost: 0.3297
cost: 0.0368
cost: 0.0347
cost: 0.0096
cost: 0.0039
cost: 0.0024
cost: 0.0044
cost: 0.0069
cost: 0.0009
拟合过程:
->>