TensorFlow - 批量处理数据
flyfish
import tensorflow as tf
import numpy as np
feature_size=50
dataset_size=255
X = np.random.rand(dataset_size,feature_size)
print(X.shape)
Y = np.random.rand(dataset_size,1)
#Y=Y.reshape(-1,1)
print(Y.shape)
# 定义训练数据batch的大小
batch_size=100
# 定义神经网络参数
w=tf.Variable(tf.random_normal([feature_size,1],stddev=1,seed=1))
b = tf.Variable(0.0, name="biases",dtype=tf.float32)
x=tf.placeholder(tf.float32,shape=(None,feature_size),name='x-input')
y_=tf.placeholder(tf.float32,shape=(None,1),name='y-input')
# 定义前向传播
y=tf.add(tf.matmul( x,w) ,b)
# 定义损失函数和反向传播算法
cross_entropy=-tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0)))
train_step=tf.train.AdamOptimizer(0.001).minimize(cross_entropy)
#cross_entropy = tf.reduce_mean(tf.square(y - y_))
#optimizer = tf.train.GradientDescentOptimizer(0.0000001) # 学习率为0.0001的梯度下降法
#train_step = optimizer.minimize(cross_entropy)
with tf.Session() as sess:
init_op=tf.global_variables_initializer()
sess.run(init_op)
print('训练前网络参数的值为:')
print(sess.run(w))
print(sess.run(b))
# 设定训练的轮数
STEPS=5000
for i in range(STEPS):
# 每次选取batch_size个样本进行训练
start=(i*batch_size)%dataset_size
end=min(start+batch_size,dataset_size)
# 通过选取的样本训练神经网络并更新参数
sess.run(train_step,feed_dict={x:X[start:end],y_:Y[start:end]})
if i % 1000==0:
# 每隔一段时间计算在所有数据上的交叉熵并输出
total_cross_entropy=sess.run(cross_entropy,feed_dict={x:X,y_:Y})
print("After {} training step(s),cross entropy on all data is {}".
format(i,total_cross_entropy))
print('训练后网络参数的值为:')
print(sess.run(w))
print(sess.run(b))
批量随机选择数据进行训练
X #特征
Y #标签
X_pred 要预测的数据
[dataset_size,feature_size]=X.shape
dfX = pd.DataFrame(X)
dfX.fillna(0.0,inplace=True)
X=np.array(dfX.values)
dfY = pd.DataFrame(Y)
dfY.fillna(0.0,inplace=True)
Y=np.array(dfY.values)
Y=Y.reshape(-1,1)
dfY.to_csv('../data/Y_train_time.csv', index=False)
print(X.shape)
print(Y.shape)
X1 = tf.nn.l2_normalize(X, dim = 0)
with tf.Session() as sess:
X=sess.run(X1)
Y1 = tf.nn.l2_normalize(Y, dim = 0)
with tf.Session() as sess:
Y=sess.run(Y1)
batch_size=100
# 定义神经网络参数
w=tf.Variable(tf.random_normal([feature_size,1],stddev=1,seed=1))
b = tf.Variable(0.0, name="biases",dtype=tf.float32)
x=tf.placeholder(tf.float32,shape=(None,feature_size),name='x-input')
y_=tf.placeholder(tf.float32,shape=(None,1),name='y-input')
# 定义前向传播
y=tf.add(tf.matmul( x,w) ,b)
# 定义损失函数和反向传播算法
cross_entropy=-tf.reduce_mean(y_*tf.log(tf.clip_by_value(y,1e-10,1.0)))
train_step=tf.train.AdamOptimizer(0.001).minimize(cross_entropy)
'''
cross_entropy = tf.reduce_mean(tf.square(y - y_))
optimizer = tf.train.GradientDescentOptimizer(0.5) # 学习率为的梯度下降法
train_step = optimizer.minimize(cross_entropy)
'''
with tf.Session() as sess:
saver = tf.train.Saver()
init_op=tf.global_variables_initializer()
sess.run(init_op)
print('训练前网络参数的值为:')
print(sess.run(w))
print(sess.run(b))
# 设定训练的轮数 准备做随机批量
STEPS=dataset_size
for i in range(STEPS):
# 每次选取batch_size个样本进行训练
rand_index = np.random.choice(dataset_size,size=(batch_size))
rand_x = X[rand_index]
rand_y = Y[rand_index]
sess.run(train_step,feed_dict={x:rand_x,y_: rand_y})
if i % 100==0:
# 每隔一段时间计算在所有数据上的交叉熵并输出
total_cross_entropy=sess.run(cross_entropy,feed_dict={x:X,y_:Y})
print("After {} training step(s),cross entropy on all data is {}".
format(i,total_cross_entropy))
print('训练后网络参数的值为:')
print(sess.run(w))
print(sess.run(b))
save_path = saver.save(sess, model_path)
print("complete train")
result = sess.run(y, feed_dict={x: X})
print(result)
df = pd.DataFrame(result)
df['ID'] = pd.Series(df.index+1)
df.to_csv(output_file_path, index=False)
print("complete predict")