牛逼的初始化xavier(tensorflow)

牛逼的初始化(必学)
初始化:有人用normal初始化cnn的参数,最后acc只能到70%多,仅仅改成xavier,acc可以到98%
1.修改初始化过程
导入

from tensorflow.contrib.layers import xavier_initializer

原来的

Weights = tf.Variable(tf.truncated_normal([n_hiddens, n_classes],stddev=0.01), dtype=tf.float32, name='W')
   

改成xavier(高斯分布)

Weights = tf.get_variable('W', [n_hiddens, n_classes], tf.float32, xavier_initializer())

2.然后初始化

#初始化
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

3.完整代码
转自:代码地址

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# created by fhqplzj on 2017/07/07 下午3:22
import random
 
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib.layers import xavier_initializer
from tensorflow.examples.tutorials.mnist import input_data
 
mnist = input_data.read_data_sets('/Users/fhqplzj/PycharmProjects/tensorflow_examples/tutorials/rnn/translate',
                                  one_hot=True)
tf.set_random_seed(777)
learning_rate = 0.001
training_epochs = 15
batch_size = 100
 
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
 
W1 = tf.get_variable('W1', [784, 512], tf.float32, xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), b1))
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)
 
W2 = tf.get_variable('W2', [512, 512], tf.float32, xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), b2))
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)
 
W3 = tf.get_variable('W3', [512, 512], tf.float32, xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.add(tf.matmul(L2, W3), b3))
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)
 
W4 = tf.get_variable('W4', [512, 512], tf.float32, xavier_initializer())
b4 = tf.Variable(tf.random_normal([512]))
L4 = tf.nn.relu(tf.add(tf.matmul(L3, W4), b4))
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)
 
W5 = tf.get_variable('W5', [512, 10], tf.float32, xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.add(tf.matmul(L4, W5), b5)
 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=hypothesis))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(hypothesis, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(training_epochs):
        avg_cost = 0
        total_batch = mnist.train.num_examples // batch_size
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            c, _ = sess.run([cost, optimizer], feed_dict={
                X: batch_xs,
                Y: batch_ys,
                keep_prob: 0.7
            })
            avg_cost += c / total_batch
        print 'Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost)
    print 'Learning Finished!'
    print 'Accuracy:', sess.run(accuracy, feed_dict={
        X: mnist.test.images,
        Y: mnist.test.labels,
        keep_prob: 1
    })
    r = random.randint(0, mnist.test.num_examples - 1)
    print 'Label: ', sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1))
    print 'Prediction: ', sess.run(tf.argmax(hypothesis, 1), feed_dict={
        X: mnist.test.images[r:r + 1],
        keep_prob: 1
    })
    plt.imshow(mnist.test.images[r:r + 1].reshape(28, 28), cmap='Greys', interpolation='nearest')
    plt.show()

猜你喜欢

转载自blog.csdn.net/fan15945028042/article/details/89162257