基于TensorFlow的MNIST实战(my1stNN)

这是HSE系列课程第一门,Introduction to Advanced Machine Learning.
第二周第二个编程作业,难易程度:中等。
使用TensorFlow对MNIST数据集图片进行分类,是一个多类分类问题。
本篇笔记对这个任务分成三个部分。
1. 实现一个二类分类问题
2. 实现一个多类分类问题,使用softmax回归,没有隐层。
3. 实现一个多类分类问题,使用softmax回归,有隐层。

from preprocessed_mnist import load_dataset
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
print(X_train.shape, y_train.shape)
import matplotlib.pyplot as plt
%matplotlib inline
plt.imshow(X_train[1], cmap="Greys");
(50000, 28, 28) (50000,)

png]![这里写图片描述

import tensorflow as tf
s = tf.InteractiveSession()
import numpy as np

1. 二类分类问题

  1. 需要将28×28的二维图片转为一维,转换完的数组加上 ‘_flatten’ 后缀。
  2. 将训练集验证集测试集中的非‘1’数据变为‘0’,这就变成了一个二类分类问题。
  3. 测试集结果可以达到99.2%的准确率。
# logistic regression -- binary
# prepare the data
m_test = y_test.shape[0]
m_train = y_train.shape[0]
m_val = y_val.shape[0]
m_test = y_test.shape[0]
n_x = X_train.shape[1]*X_train.shape[2] # num of input features

y_train_1 = np.zeros([m_train,]) # the y_train equals 0 if the original value is not 1.
y_val_1 = np.zeros([m_val,])
y_test_1 = np.zeros([m_test,])
predicted_y_test = np.zeros([m_test,])
X_train_flatten = X_train.reshape(m_train,n_x)
X_test_flatten = X_test.reshape(m_test,n_x)

for i in range(m_train):
    if(y_train[i] == 1):
        y_train_1[i] = 1
    else:
        y_train_1[i] = 0
for i in range(m_val):
    if(y_test[i] == 1):
        y_test_1[i] = 1
    else:
        y_test_1[i] = 0
for i in range(m_test):
    if(y_val[i] == 1):
        y_val_1[i] = 1
    else:
        y_val_1[i] = 0
#print(m_train)

申请一组系数,因为只做二类分类问题,输出层只有一个值,W1的维数为n_x × 1。

# binary classification
W1 = tf.Variable(tf.random_normal(shape = [n_x,1],mean = 0, stddev = 0.01))
b1 = tf.Variable(0.0)


input_X = tf.placeholder("float32", shape=(None,None), name="input_X")
input_y = tf.placeholder("float32", shape=(None,), name="input_y")

predicted_y = tf.sigmoid(tf.matmul(input_X,W1) + b1)
predicted_y = tf.squeeze(predicted_y)

loss = -tf.reduce_mean(input_y * tf.log(predicted_y) + (1-input_y)* tf.log(1-predicted_y))
print(loss.shape)
#optimizer = tf.train.MomentumOptimizer(0.01, 0.5).minimize(loss)
optimizer = tf.train.AdamOptimizer(learning_rate = 0.01,beta1=0.9,beta2=0.999,epsilon=1e-08,).minimize(loss)
s.run(tf.global_variables_initializer())
for i in range(50):
    s.run(optimizer, {input_X: X_train_flatten, input_y: y_train_1})
    loss_i = s.run(loss, {input_X: X_train_flatten, input_y: y_train_1})
    #print("loss at iter %i:%.4f" % (i, loss_i))
    if i%9 == 0:
        predicted_y_test = s.run(predicted_y, {input_X: X_test_flatten})
        for j in range(m_test):
            if predicted_y_test[j] > 0.5:
                predicted_y_test[j] = 1
            else:
                predicted_y_test[j] = 0
        n_correct = sum(predicted_y_test == y_test_1)
        #print(n_correct/m_test)
<unknown>

2. 多分类问题,不加隐层

如果不加隐层的话,使用softmax回归,测试集可以达到约92%的正确率。

3.多分类问题,加隐层

加上隐层,使用softmax回归,测试集可以达到97%左右的正确率。

# logistic regression -- multi-class
# prepare the data
m_test = y_test.shape[0]
m_train = y_train.shape[0]
m_val = y_val.shape[0]
m_test = y_test.shape[0]
n_x = X_train.shape[1]*X_train.shape[2] # num of input features

y_train_onehot = np.zeros([m_train,10]) # the y_train equals 0 if the original value is not 1.
y_val_onehot = np.zeros([m_val,10])
y_test_onehot = np.zeros([m_test,10])
predicted_y_test = np.zeros([m_test,10])
X_train_flatten = X_train.reshape(m_train,n_x)
X_test_flatten = X_test.reshape(m_test,n_x)
X_val_flatten = X_val.reshape(m_val,n_x)

#print(m_train)
# multi-class classification
one_hot_matrix = tf.one_hot(indices =y_train,depth = 10,axis=1)
sess = tf.Session()
y_train_onehot = sess.run(one_hot_matrix)

one_hot_matrix = tf.one_hot(indices =y_val,depth = 10,axis=1)
sess = tf.Session()
y_val_onehot = sess.run(one_hot_matrix)

one_hot_matrix = tf.one_hot(indices =y_test,depth = 10,axis=1)
sess = tf.Session()
y_test_onehot = sess.run(one_hot_matrix)
print(y_test_onehot)
#y_train_onehot = tf.one_hot(indices = y_train,depth = ,axis = 1)
#y_val_onehot = tf.one_hot(indices = y_val,depth = 10,axis = 1)
#y_test_onehot = tf.one_hot(indices = y_test,depth = 10,axis = 1)
#s.run(y_train_onehot)
#s.run(y_val_onehot)
#s.run(y_test_onehot)


W1 = tf.Variable(tf.random_normal(shape = [n_x,50],mean = 0, stddev = 0.01))
b1 = tf.Variable(tf.random_normal(shape = [50],mean = 0, stddev = 0.01))
W2 = tf.Variable(tf.random_normal(shape = [50,10],mean = 0, stddev = 0.01))
b2 = tf.Variable(tf.random_normal(shape = [10],mean = 0, stddev = 0.01))


input_X = tf.placeholder("float32", shape=(None,None), name="input_X")
input_y = tf.placeholder("float32", shape=(None,None), name="input_y")

hidden_y = tf.nn.relu(tf.matmul(input_X,W1) + b1)
predicted_y = tf.nn.softmax(tf.matmul(hidden_y,W2) + b2)
#predicted_y = tf.squeeze(predicted_y)

loss = -tf.reduce_mean(input_y * tf.log(predicted_y + 1e-10) + (1-input_y)* tf.log(1-predicted_y + 1e-10))
print(loss.shape)
#optimizer = tf.train.MomentumOptimizer(0.01, 0.5).minimize(loss)
optimizer = tf.train.AdamOptimizer(learning_rate = 0.01,beta1=0.9,beta2=0.999,epsilon=1e-08,).minimize(loss)
s.run(tf.global_variables_initializer())
for i in range(300):
    s.run(optimizer, {input_X: X_train_flatten, input_y: y_train_onehot})
    loss_i = s.run(loss, {input_X: X_train_flatten, input_y: y_train_onehot})
    print("loss at iter %i:%.4f" % (i, loss_i))
    if i%10 == 0:
        correct_prediction = tf.equal(tf.argmax(predicted_y, 1), tf.argmax(y_test_onehot, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        accuracy_result = s.run(accuracy, feed_dict={input_X: X_test_flatten})
        print(accuracy_result)
        correct_prediction = tf.equal(tf.argmax(predicted_y, 1), tf.argmax(y_val_onehot, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        accuracy_result = s.run(accuracy, feed_dict={input_X: X_val_flatten})
        print(accuracy_result)

猜你喜欢

转载自blog.csdn.net/s09094031/article/details/80149476