TensorFlow-based MNIST combat (my1stNN)

This is the first course in the HSE series, Introduction to Advanced Machine Learning.
The second programming assignment in the second week, difficulty level: medium.
Using TensorFlow to classify images in the MNIST dataset is a multi-class classification problem.
This note is divided into three parts for this task.
1. Implement a two-class classification problem
2. Implement a multi-class classification problem, using softmax regression, without hidden layers.
3. Implement a multi-class classification problem, using softmax regression, with hidden layers.

from preprocessed_mnist import load_dataset
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
print(X_train.shape, y_train.shape)
import matplotlib.pyplot as plt
%matplotlib inline
plt.imshow(X_train[1], cmap="Greys");
(50000, 28, 28) (50000,)

png]![Write picture description here

import tensorflow as tf
s = tf.InteractiveSession()
import numpy as np

1. Two-class classification problem

  1. The 28×28 two-dimensional image needs to be converted to one-dimensional, and the converted array is suffixed with '_flatten'.
  2. Change the non-'1' data in the training set, validation set and test set to '0', which becomes a two-class classification problem.
  3. The test set results can reach 99.2% accuracy.
# logistic regression -- binary
# prepare the data
m_test = y_test.shape[0]
m_train = y_train.shape[0]
m_val = y_val.shape[0]
m_test = y_test.shape[0]
n_x = X_train.shape[1]*X_train.shape[2] # num of input features

y_train_1 = np.zeros([m_train,]) # the y_train equals 0 if the original value is not 1.
y_val_1 = np.zeros([m_val,])
y_test_1 = np.zeros([m_test,])
predicted_y_test = np.zeros([m_test,])
X_train_flatten = X_train.reshape(m_train,n_x)
X_test_flatten = X_test.reshape(m_test,n_x)

for i in range(m_train):
    if(y_train[i] == 1):
        y_train_1[i] = 1
    else:
        y_train_1[i] = 0
for i in range(m_val):
    if(y_test[i] == 1):
        y_test_1[i] = 1
    else:
        y_test_1[i] = 0
for i in range(m_test):
    if(y_val[i] == 1):
        y_val_1[i] = 1
    else:
        y_val_1[i] = 0
#print(m_train)

Apply a set of coefficients, because only the two-class classification problem is done, the output layer has only one value, and the dimension of W1 is n_x × 1.

# binary classification
W1 = tf.Variable(tf.random_normal(shape = [n_x,1],mean = 0, stddev = 0.01))
b1 = tf.Variable(0.0)


input_X = tf.placeholder("float32", shape=(None,None), name="input_X")
input_y = tf.placeholder("float32", shape=(None,), name="input_y")

predicted_y = tf.sigmoid(tf.matmul(input_X,W1) + b1)
predicted_y = tf.squeeze(predicted_y)

loss = -tf.reduce_mean(input_y * tf.log(predicted_y) + (1-input_y)* tf.log(1-predicted_y))
print(loss.shape)
#optimizer = tf.train.MomentumOptimizer(0.01, 0.5).minimize(loss)
optimizer = tf.train.AdamOptimizer(learning_rate = 0.01,beta1=0.9,beta2=0.999,epsilon=1e-08,).minimize(loss)
s.run(tf.global_variables_initializer())
for i in range(50):
    s.run(optimizer, {input_X: X_train_flatten, input_y: y_train_1})
    loss_i = s.run(loss, {input_X: X_train_flatten, input_y: y_train_1})
    #print("loss at iter %i:%.4f" % (i, loss_i))
    if i%9 == 0:
        predicted_y_test = s.run(predicted_y, {input_X: X_test_flatten})
        for j in range(m_test):
            if predicted_y_test[j] > 0.5:
                predicted_y_test[j] = 1
            else:
                predicted_y_test[j] = 0
        n_correct = sum(predicted_y_test == y_test_1)
        #print(n_correct/m_test)
<unknown>

2. Multi-classification problem without hidden layer

If no hidden layer is added, using softmax regression, the test set can achieve a correct rate of about 92%.

3. Multi-classification problem, add hidden layer

Together with the hidden layer, using softmax regression, the test set can achieve a correct rate of about 97%.

# logistic regression -- multi-class
# prepare the data
m_test = y_test.shape[0]
m_train = y_train.shape[0]
m_val = y_val.shape[0]
m_test = y_test.shape[0]
n_x = X_train.shape[1]*X_train.shape[2] # num of input features

y_train_onehot = np.zeros([m_train,10]) # the y_train equals 0 if the original value is not 1.
y_val_onehot = np.zeros([m_val,10])
y_test_onehot = np.zeros([m_test,10])
predicted_y_test = np.zeros([m_test,10])
X_train_flatten = X_train.reshape(m_train,n_x)
X_test_flatten = X_test.reshape(m_test,n_x)
X_val_flatten = X_val.reshape(m_val,n_x)

#print(m_train)
# multi-class classification
one_hot_matrix = tf.one_hot(indices =y_train,depth = 10,axis=1)
sess = tf.Session()
y_train_onehot = sess.run(one_hot_matrix)

one_hot_matrix = tf.one_hot(indices =y_val,depth = 10,axis=1)
sess = tf.Session()
y_val_onehot = sess.run(one_hot_matrix)

one_hot_matrix = tf.one_hot(indices =y_test,depth = 10,axis=1)
sess = tf.Session()
y_test_onehot = sess.run(one_hot_matrix)
print(y_test_onehot)
#y_train_onehot = tf.one_hot(indices = y_train,depth = ,axis = 1)
#y_val_onehot = tf.one_hot(indices = y_val,depth = 10,axis = 1)
#y_test_onehot = tf.one_hot(indices = y_test,depth = 10,axis = 1)
#s.run(y_train_onehot)
#s.run(y_val_onehot)
#s.run(y_test_onehot)


W1 = tf.Variable(tf.random_normal(shape = [n_x,50],mean = 0, stddev = 0.01))
b1 = tf.Variable(tf.random_normal(shape = [50],mean = 0, stddev = 0.01))
W2 = tf.Variable(tf.random_normal(shape = [50,10],mean = 0, stddev = 0.01))
b2 = tf.Variable(tf.random_normal(shape = [10],mean = 0, stddev = 0.01))


input_X = tf.placeholder("float32", shape=(None,None), name="input_X")
input_y = tf.placeholder("float32", shape=(None,None), name="input_y")

hidden_y = tf.nn.relu(tf.matmul(input_X,W1) + b1)
predicted_y = tf.nn.softmax(tf.matmul(hidden_y,W2) + b2)
#predicted_y = tf.squeeze(predicted_y)

loss = -tf.reduce_mean(input_y * tf.log(predicted_y + 1e-10) + (1-input_y)* tf.log(1-predicted_y + 1e-10))
print(loss.shape)
#optimizer = tf.train.MomentumOptimizer(0.01, 0.5).minimize(loss)
optimizer = tf.train.AdamOptimizer(learning_rate = 0.01,beta1=0.9,beta2=0.999,epsilon=1e-08,).minimize(loss)
s.run(tf.global_variables_initializer())
for i in range(300):
    s.run(optimizer, {input_X: X_train_flatten, input_y: y_train_onehot})
    loss_i = s.run(loss, {input_X: X_train_flatten, input_y: y_train_onehot})
    print("loss at iter %i:%.4f" % (i, loss_i))
    if i%10 == 0:
        correct_prediction = tf.equal(tf.argmax(predicted_y, 1), tf.argmax(y_test_onehot, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        accuracy_result = s.run(accuracy, feed_dict={input_X: X_test_flatten})
        print(accuracy_result)
        correct_prediction = tf.equal(tf.argmax(predicted_y, 1), tf.argmax(y_val_onehot, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        accuracy_result = s.run(accuracy, feed_dict={input_X: X_val_flatten})
        print(accuracy_result)

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325594725&siteId=291194637