TensorFlow - 批量处理数据

flyfish

import tensorflow as tf
import numpy as np


feature_size=50
dataset_size=255
X = np.random.rand(dataset_size,feature_size)

print(X.shape)

Y = np.random.rand(dataset_size,1)

#Y=Y.reshape(-1,1)
print(Y.shape)


# 定义训练数据batch的大小
batch_size=100


# 定义神经网络参数
w=tf.Variable(tf.random_normal([feature_size,1],stddev=1,seed=1))
b = tf.Variable(0.0, name="biases",dtype=tf.float32)
x=tf.placeholder(tf.float32,shape=(None,feature_size),name='x-input')
y_=tf.placeholder(tf.float32,shape=(None,1),name='y-input')

# 定义前向传播

y=tf.add(tf.matmul( x,w) ,b)


# 定义损失函数和反向传播算法
cross_entropy=-tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0)))
train_step=tf.train.AdamOptimizer(0.001).minimize(cross_entropy)


#cross_entropy = tf.reduce_mean(tf.square(y - y_))
#optimizer = tf.train.GradientDescentOptimizer(0.0000001)  # 学习率为0.0001的梯度下降法
#train_step = optimizer.minimize(cross_entropy)

with tf.Session() as sess:
    init_op=tf.global_variables_initializer()
    sess.run(init_op)

    print('训练前网络参数的值为：')
    print(sess.run(w))
    print(sess.run(b))

    # 设定训练的轮数
    STEPS=5000
    for i in range(STEPS):
        # 每次选取batch_size个样本进行训练
        start=(i*batch_size)%dataset_size
        end=min(start+batch_size,dataset_size)

        # 通过选取的样本训练神经网络并更新参数
        sess.run(train_step,feed_dict={x:X[start:end],y_:Y[start:end]})
        if i % 1000==0:
            # 每隔一段时间计算在所有数据上的交叉熵并输出
            total_cross_entropy=sess.run(cross_entropy,feed_dict={x:X,y_:Y})
            print("After {} training step(s),cross entropy on all data is {}".
                  format(i,total_cross_entropy))

    print('训练后网络参数的值为：')
    print(sess.run(w))
    print(sess.run(b))

批量随机选择数据进行训练

X  #特征
Y #标签

X_pred 要预测的数据

[dataset_size,feature_size]=X.shape


dfX = pd.DataFrame(X)
dfX.fillna(0.0,inplace=True)
X=np.array(dfX.values)

dfY = pd.DataFrame(Y)
dfY.fillna(0.0,inplace=True)
Y=np.array(dfY.values)
Y=Y.reshape(-1,1)
dfY.to_csv('../data/Y_train_time.csv', index=False)



print(X.shape)
print(Y.shape)


X1 = tf.nn.l2_normalize(X, dim = 0)  
with tf.Session() as sess:
    X=sess.run(X1)  

Y1 = tf.nn.l2_normalize(Y, dim = 0)  
with tf.Session() as sess:
    Y=sess.run(Y1)     


batch_size=100
# 定义神经网络参数
w=tf.Variable(tf.random_normal([feature_size,1],stddev=1,seed=1))
b = tf.Variable(0.0, name="biases",dtype=tf.float32)
x=tf.placeholder(tf.float32,shape=(None,feature_size),name='x-input')
y_=tf.placeholder(tf.float32,shape=(None,1),name='y-input')

# 定义前向传播
y=tf.add(tf.matmul( x,w) ,b)


# 定义损失函数和反向传播算法
cross_entropy=-tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0)))
train_step=tf.train.AdamOptimizer(0.001).minimize(cross_entropy)
'''
cross_entropy = tf.reduce_mean(tf.square(y - y_))
optimizer = tf.train.GradientDescentOptimizer(0.5)  # 学习率为的梯度下降法
train_step = optimizer.minimize(cross_entropy)
'''
with tf.Session() as sess:
    saver = tf.train.Saver()
    init_op=tf.global_variables_initializer()
    sess.run(init_op)

    print('训练前网络参数的值为：')
    print(sess.run(w))
    print(sess.run(b))

    # 设定训练的轮数 准备做随机批量
    STEPS=dataset_size
    for i in range(STEPS):
        # 每次选取batch_size个样本进行训练
        rand_index = np.random.choice(dataset_size,size=(batch_size))
        rand_x = X[rand_index]
        rand_y = Y[rand_index]


        sess.run(train_step,feed_dict={x:rand_x,y_: rand_y})
        if i % 100==0:
            # 每隔一段时间计算在所有数据上的交叉熵并输出
            total_cross_entropy=sess.run(cross_entropy,feed_dict={x:X,y_:Y})
            print("After {} training step(s),cross entropy on all data is {}".
                  format(i,total_cross_entropy))        



    print('训练后网络参数的值为：')
    print(sess.run(w))
    print(sess.run(b))

    save_path = saver.save(sess, model_path)

    print("complete train")




    result = sess.run(y, feed_dict={x: X})
    print(result)
    df = pd.DataFrame(result)
    df['ID'] = pd.Series(df.index+1)
    df.to_csv(output_file_path, index=False)    
    print("complete predict")

TensorFlow - 批量处理数据

猜你喜欢