TensorFlow如何工作?什么是机器学习模型,什么是神经网络?,神经网络如何学习,如何处理数据并将其传递给神经网络输入,如何运行模型并获得预测结果?
用神经网络和TensorFlow进行文本分类
# -*- coding:utf-8 -*-
# 用神经网络和TensorFlow分类文本
import numpy as np
import tensorflow as tf
from collections import Counter
from sklearn.datasets import fetch_20newsgroups
categories = ["comp.graphics", "sci.space", "rec.sport.baseball"]
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
print('total texts in train:', len(newsgroups_train.data))
print('total texts in test:', len(newsgroups_test.data))
vocab = Counter()
for text in newsgroups_train.data:
for word in text.split(' '):
vocab[word.lower()] += 1
for text in newsgroups_test.data:
for word in text.split(' '):
vocab[word.lower()] += 1
print("Total words:", len(vocab))
total_words = len(vocab)
def get_word_2_index(vocab):
word2index = {}
for i, word in enumerate(vocab):
word2index[word.lower()] = i
return word2index
word2index = get_word_2_index(vocab)
def get_batch(df, i, batch_size):
batches = []
results = []
texts = df.data[i*batch_size:i*batch_size+batch_size]
categories = df.target[i*batch_size:i*batch_size+batch_size]
for text in texts:
layer = np.zeros(total_words, dtype=float)
for word in text.split(' '):
layer[word2index[word.lower()]] += 1
batches.append(layer)
for category in categories:
y = np.zeros((3), dtype=float)
if category == 0:
y[0] = 1
elif category == 1:
y[1] = 1
else:
y[2] = 1
results.append(y)
return np.array(batches), np.array(results)
# parameters
learning_rate = 0.01
training_epochs = 10
batch_size = 150
display_step = 1
# Network Parameters
n_hidden_1 = 100 # 1st layer number of features
n_hidden_2 = 100 # 2nd layer number of features
n_input = total_words # Words in vocab
n_classes = 3 # Categories: graphics, sci.space and baseball
input_tensor = tf.placeholder(tf.float32, [None, n_input], name='input')
output_tensor = tf.placeholder(tf.float32, [None, n_classes], name='output')
def multilayer_perceptron(input_tensor, weights, biases):
layer_1_multiplication = tf.matmul(input_tensor, weights['h1'])
layer_1_addition = tf.add(layer_1_multiplication, biases['b1'])
layer_1 = tf.nn.relu(layer_1_addition)
# Hidden layer with RELU activation
layer_2_multiplication = tf.matmul(layer_1, weights['h2'])
layer_2_addition = tf.add(layer_2_multiplication, biases['b2'])
layer_2 = tf.nn.relu(layer_2_addition)
# output layer
out_layer_multiplication = tf.matmul(layer_2, weights['out'])
out_layer_addition = tf.add(out_layer_multiplication, biases['out'])
return out_layer_addition
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# construct model
prediction = multilayer_perceptron(input_tensor, weights, biases)
# Define loss & optimizer
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=output_tensor))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run()
# training cycle
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(len(newsgroups_train.data)/batch_size)
# loop over all batches
for i in range(total_batch):
batch_x, batch_y = get_batch(newsgroups_train, i, batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
c, _ = sess.run([loss, optimizer], feed_dict={input_tensor: batch_x, output_tensor: batch_y})
# compute average loss
avg_cost += c/total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "loss=", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(output_tensor, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
total_test_data = len(newsgroups_test.target)
batch_x_test, batch_y_test = get_batch(newsgroups_test, 0, total_test_data)
print("Accuracy:", accuracy.eval({input_tensor: batch_x_test, output_tensor: batch_y_test}))
total texts in train: 1774
total texts in test: 1180
Total words: 119930
2017-12-27 16:25:00.121692: I C:\tf_jenkins\home\workspace\rel-win\M\windows\PY\36\tensorflow\core\platform\cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX AVX2
Epoch: 0001 loss= 164.599542791
Epoch: 0001 loss= 229.308005593
Epoch: 0001 loss= 312.437832919
Epoch: 0001 loss= 419.234974254
Epoch: 0001 loss= 491.679138184
Epoch: 0001 loss= 628.423256614
Epoch: 0001 loss= 716.383273038
Epoch: 0001 loss= 767.755315607
Epoch: 0001 loss= 804.416395708
Epoch: 0001 loss= 839.817649148
Epoch: 0001 loss= 873.678663774
Epoch: 0002 loss= 12.155790849
Epoch: 0002 loss= 37.855947321
Epoch: 0002 loss= 61.012716120
Epoch: 0002 loss= 89.293117176
Epoch: 0002 loss= 95.758963845
Epoch: 0002 loss= 106.525867115
Epoch: 0002 loss= 140.357190912
Epoch: 0002 loss= 156.909991871
Epoch: 0002 loss= 188.811081626
Epoch: 0002 loss= 201.752990029
Epoch: 0002 loss= 223.565703652
Epoch: 0003 loss= 22.479526867
Epoch: 0003 loss= 36.388277921
Epoch: 0003 loss= 40.809483615
Epoch: 0003 loss= 45.898767298
Epoch: 0003 loss= 54.971788927
Epoch: 0003 loss= 92.455045873
Epoch: 0003 loss= 105.125879114
Epoch: 0003 loss= 107.778049989
Epoch: 0003 loss= 109.214097977
Epoch: 0003 loss= 109.673732237
Epoch: 0003 loss= 114.287493446
Epoch: 0004 loss= 20.967838634
Epoch: 0004 loss= 36.625368985
Epoch: 0004 loss= 56.412753018
Epoch: 0004 loss= 61.769968900
Epoch: 0004 loss= 69.876602173
Epoch: 0004 loss= 71.778213328
Epoch: 0004 loss= 74.627652255
Epoch: 0004 loss= 74.787704804
Epoch: 0004 loss= 75.873412468
Epoch: 0004 loss= 80.153322816
Epoch: 0004 loss= 95.818399332
Epoch: 0005 loss= 1.251875010
Epoch: 0005 loss= 4.656712185
Epoch: 0005 loss= 5.162945314
Epoch: 0005 loss= 5.383128795
Epoch: 0005 loss= 6.082911080
Epoch: 0005 loss= 7.967948935
Epoch: 0005 loss= 7.992210087
Epoch: 0005 loss= 7.992210087
Epoch: 0005 loss= 8.931116670
Epoch: 0005 loss= 9.138003655
Epoch: 0005 loss= 9.726748903
Epoch: 0006 loss= 10.962064570
Epoch: 0006 loss= 15.842136730
Epoch: 0006 loss= 19.281685569
Epoch: 0006 loss= 20.565143499
Epoch: 0006 loss= 21.748276104
Epoch: 0006 loss= 21.754320296
Epoch: 0006 loss= 21.754320296
Epoch: 0006 loss= 21.754320296
Epoch: 0006 loss= 21.808484375
Epoch: 0006 loss= 21.808484375
Epoch: 0006 loss= 29.417288557
Epoch: 0007 loss= 1.706949928
Epoch: 0007 loss= 2.966487364
Epoch: 0007 loss= 2.966487364
Epoch: 0007 loss= 2.966487364
Epoch: 0007 loss= 2.966487364
Epoch: 0007 loss= 2.966487364
Epoch: 0007 loss= 3.028692327
Epoch: 0007 loss= 3.216852399
Epoch: 0007 loss= 3.259427713
Epoch: 0007 loss= 3.259427713
Epoch: 0007 loss= 3.259427713
Epoch: 0008 loss= 0.000000000
Epoch: 0008 loss= 0.000000000
Epoch: 0008 loss= 0.021928859
Epoch: 0008 loss= 0.223925532
Epoch: 0008 loss= 0.517013535
Epoch: 0008 loss= 0.517013535
Epoch: 0008 loss= 0.517013535
Epoch: 0008 loss= 0.677528009
Epoch: 0008 loss= 0.677528009
Epoch: 0008 loss= 0.677528009
Epoch: 0008 loss= 0.677528009
Epoch: 0009 loss= 2.619076642
Epoch: 0009 loss= 2.619076642
Epoch: 0009 loss= 2.619076642
Epoch: 0009 loss= 2.674342090
Epoch: 0009 loss= 2.674342090
Epoch: 0009 loss= 2.674342090
Epoch: 0009 loss= 2.674342090
Epoch: 0009 loss= 2.674342090
Epoch: 0009 loss= 2.674342090
Epoch: 0009 loss= 2.674342090
Epoch: 0009 loss= 2.674342090
Epoch: 0010 loss= 1.000655781
Epoch: 0010 loss= 1.000655781
Epoch: 0010 loss= 1.000655781
Epoch: 0010 loss= 1.000655781
Epoch: 0010 loss= 1.000655781
Epoch: 0010 loss= 1.000655781
Epoch: 0010 loss= 1.000655781
Epoch: 0010 loss= 1.000655781
Epoch: 0010 loss= 1.000655781
Epoch: 0010 loss= 1.000675207
Epoch: 0010 loss= 1.000675207
Optimization Finished!
Accuracy: 0.723729
Process finished with exit code 0