标准神经网络框架（Standard NN）

基本训练框架

# 导入库
%matplotlib notebook
import tensorflow as tf
from time import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

# 定义全连接层函数
def fcn_layer(inputs,input_dim,output_dim,activation=None):
	W=tf.Variable(tf.truncated_normal([input_dim,output_dim],stddev=0.1))# 以截断正态分布随机数初始化w
	b=tf.Variable(tf.zeros([output_dim]))#以0初始化b
	XWb=tf.matmul(inpus,W)+b
	#指定激活函数
	if activation is None:
		outputs=XWb
	else:
		outputs=activation(XWb)
	return outputs

# 维数超参数
X_NN=784
H1_NN=256
H2_NN=64
Y_dim=10

# 输入层
x=tf.placeholder(tf.float32,[None,X_NN],name="X")

# 隐藏层1
h1=fcn_layer(inputs=x,input_dim=X_NN,outpu_dim=H1_NN,activation=tf.nn.relu)

# 隐藏层2
h2=fcn_layer(inputs=h1,input_dim=H1_NN,outpu_dim=H2_NN,activation=tf.nn.relu)

# 输出层
forward=fcn_layer(inputs=h2,input_dim=H2_NN,output_dim=Y_dim,activation=None)
pred=tf.nn.softmax(forward)

# 迭代训练
## 定义标签占位符
y=tf.placeholder(tf.float32,[None,Y_dim],name="Y")

## 损失函数
loss_function=tf.nn.softmax_cross_entropy_with_logits(logits=forward,label=y)

## 训练超参数
train_epochs=40
batch_size=50
total_batch=int(len(X)/batch_size)
display_step=1
learning_rate=0.01

## 指定优化器
optimizer=tf.train_AdamOptimizer(learning_rate).minimize(loss_function)

## 定义评价函数
correct_prediction=tf.equal(tf.argmax(y,1),tf.argmax(pred,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

## Session
StartTime=time()

sess=tf.Session()
sess.run(tf.global_variables_initializer())

step=0
loss_list=[]
for epoch in range(train_epochs):
	for xs,ys in zip(x_data,y_data):
		_,loss,acc=sess.run([optimizer,loss_function,accuracy],feed_dict={x:xs,y:ys})
		loss_list.append(loss)
		step+=1
		if step % display_step ==0:
			print(epoch+1,step,loss,acc)
	b0temp=b.eval(session=sess)
	w0temp=w.eval(session=sess)
duration=time()-StartTime
print("Train Finished takes:",duration)

CNN基本框架

# 导入库
%matplotlib notebook
import tensorflow as tf

# 定义共享函数
def weight(shape):
	return tf.Variable(tf.truncated_normal(shape,stddev=0.1,name='weights')

def bias(shape):
	return tf.Variable(tf.constant(0.1,shape=shape),name='bias')

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

# 定义网络结构
## 输入层
with tf.name_scope('input_layer'):
	x=tf.placeholder('float',shape=[None,32,32,3],name='X')

## 卷积1
with tf.name_scope('conv_1'):
	W1=weight([3,3,3,32])# [k_width,k_height,input_channel,output_channel]
	b1=bias([32])#与output_channel一致
	conv_1=conv2d(x,W1)+b1
	conv_1=tf.nn.relu(conv_1)

## 池化1
with tf.name_scope('pool_1'):
	pool_1=max_pool_2x2(conv_1)

## 卷积2
with tf.name_scope('conv_2'):
	W2=weight([3,3,32,64])# [k_width,k_height,input_channel,output_channel]
	b2=bias([64])#与output_channel一致
	conv_2=conv2d(x,W2)+b2
	conv_2=tf.nn.relu(conv_2)

## 池化2
with tf.name_scope('pool_2'):
	pool_2=max_pool_2x2(conv_2)

## 全连接层
with tf.name_scope('fc'):
	W3=weight([4096,128])
	b3=bias([128])
	flat=tf.reshape(pool_2,[-1,4096])
	h=tf.nn.relu(tf.matmul(flat,W3)+b3)
	h_dropout=tf.nn.dropout(h,keep_prob=0.8)

## 输出层
with tf.name_scope('output_layer'):
	W4=weight([128,10])
	b4=bias([10])
	pred=tf.nn.softmax(tf.matmul(h_dropout,W4)+b4)

# 构建模型
## 定义优化器
with tf.name_scope('optimizer'):
	y=tf.placeholder('float',shape=[None,10],name='label')
	loss_function=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))
	optimizer=tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss_function)

## 定义准确率
with tf.name_scope('evaluation'):
	correct_prediction=tf.equal(tf.argmax(y,1),tf.argmax(pred,1))
	accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

# 训练模型
# 见基本神经网络框架

断点续训

# 存储模型的粒度
save_step=5

# 创建保存模型文件的目录
import os
skpt_dir="D:/Python_AI/断点续训/train_name/"
if not os.path.exists(ckpt_dir):
	os.makedirs(ckpt_dir)

# 创建saver对象
saver = tf.train.Saver()

# 保存断点，在Session部分加入以下代码
	if (epoch+1)% save_step==0:
		saver.save(sess,os.path.join(ckpt_dir,'train_name_model_{:06d}.ckpt'.format(epoch+1)))
		print('train_name_model_{:06d}.ckpt saved'.format(epoch+1))
saver.save(sess,os.path.join(ckpt_dir,'train_name_model.ckpt'))
print('model saved')

# 断点续训，在Session部分加入以下代码
ckpt=tf.train.get_checkpoint_state(skpt_dir)
if skpt and ckpt.model_checkpoint_path:
	saver.restore(sess,ckpt.model_checkpoint_path)
	print('restore model from ',ckpt.model_checkpoint_path)

卷积神经网络（CNN）

from keras.layers import Conv2D,MaxPooling2D,Dropout,Flatten,Dense
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.callbacks import ReduceLROnPlateau

# 网络结构((conv->pool->dropout)*2->Flatten->Dense->dropout->Dense(out)) 
model=Sequential()
model.add(Conv2D())
model.add(MaxPooling2D())
model.add(Dropout())
model.add(Conv2D())
model.add(MaxPooling2D())
model.add(Dropout())
model.add(Flatten())
model.add(Dense())
model.add(Dropout())
model.add(Dense())

# Define the optimizer
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
# 配置模型
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

# Set a learning rate annealer
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)
epochs = 1 # Turn epochs to 30 to get 0.9967 accuracy
batch_size = 86

# Fit the model
history = model.fit_generator(datagen.flow(X_train,Y_train, batch_size=batch_size),
                              epochs = epochs, validation_data = (X_val,Y_val),
                              verbose = 2, steps_per_epoch=X_train.shape[0] // batch_size
                              , callbacks=[learning_rate_reduction])

# Plot the loss and accuracy curves for training and validation 
fig, ax = plt.subplots(2,1)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(history.history['acc'], color='b', label="Training accuracy")
ax[1].plot(history.history['val_acc'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)


# Display some error results 

# Errors are difference between predicted labels and true labels
errors = (Y_pred_classes - Y_true != 0)

Y_pred_classes_errors = Y_pred_classes[errors]
Y_pred_errors = Y_pred[errors]
Y_true_errors = Y_true[errors]
X_val_errors = X_val[errors]

def display_errors(errors_index,img_errors,pred_errors, obs_errors):
    """ This function shows 6 images with their predicted and real labels"""
    n = 0
    nrows = 2
    ncols = 3
    fig, ax = plt.subplots(nrows,ncols,sharex=True,sharey=True)
    for row in range(nrows):
        for col in range(ncols):
            error = errors_index[n]
            ax[row,col].imshow((img_errors[error]).reshape((28,28)))
            ax[row,col].set_title("Predicted label :{}\nTrue label :{}".format(pred_errors[error],obs_errors[error]))
            n += 1

# Probabilities of the wrong predicted numbers
Y_pred_errors_prob = np.max(Y_pred_errors,axis = 1)

# Predicted probabilities of the true values in the error set
true_prob_errors = np.diagonal(np.take(Y_pred_errors, Y_true_errors, axis=1))

# Difference between the probability of the predicted label and the true label
delta_pred_true_errors = Y_pred_errors_prob - true_prob_errors

# Sorted list of the delta prob errors
sorted_dela_errors = np.argsort(delta_pred_true_errors)

# Top 6 errors 
most_important_errors = sorted_dela_errors[-6:]

# Show the top 6 errors
display_errors(most_important_errors, X_val_errors, Y_pred_classes_errors, Y_true_errors)

# predict results
results = model.predict(test)

# select the indix with the maximum probability
results = np.argmax(results,axis = 1)

results = pd.Series(results,name="Label")
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)

submission.to_csv("C:/Users/lihao/Desktop/cnn_mnist_datagen.csv",index=False)

李豪呀

发布了60 篇原创文章 · 获赞 32 · 访问量 2万+

私信关注

nn baseline（最近更新：2019/10/23）

文章目录

标准神经网络框架（Standard NN）

基本训练框架

CNN基本框架

断点续训

卷积神经网络（CNN）

猜你喜欢