TensorFlow builds a neural network
TensorFlow builds a neural network
1. Build a binary classification neural network
# %%
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# 产生数据
# 模拟直线y = 2x + 6
originX = tf.convert_to_tensor(np.linspace(-3, 3, 500), dtype = tf.double)
# 产生噪音
originY = 2 * originX + 6 + tf.random.normal((originX.shape[0], ), 0, 0.5, dtype = tf.double)
# %%
X = tf.cast(originX, tf.float32)
y = tf.cast(originY, tf.float32)
# 配对
train = tf.data.Dataset.from_tensor_slices((X, y)).batch(32)
# 定义神经网络训练参数
# [1, 1]表示输入层有一个神经元,隐藏层也有一个神经元
# [1]表示输出层有一个神经元
w = tf.Variable(tf.random.truncated_normal([1, 1], stddev = 0.1, seed = 1))
b = tf.Variable(tf.random.truncated_normal([1], stddev = 0.1, seed = 1))
# 定义学习率、训练次数
learnRate = 0.01
epoch = 10
lossAll = 0
for epoch in range(epoch):
for step, (xTrain, yTrain) in enumerate(train):
xTrain = tf.reshape(xTrain, (xTrain.shape[0], 1))
with tf.GradientTape() as tape:
# 计算y,y = Σwx + b
y = tf.matmul(xTrain, w) + b
# 计算损失函数
loss = tf.reduce_mean(
tf.square(yTrain - y)
)
lossAll += loss.numpy()
# 对损失函数的每个自变量求导数
gradient = tape.gradient(loss, [w, b])
# 梯度下降
w.assign_sub(learnRate * gradient[0])
b.assign_sub(learnRate * gradient[1])
print(f"Epoch: {
epoch}, Loss: {
lossAll / 4}")
lossAll = 0
print(w)
print(b)
w = w.numpy()[0][0]
b = b.numpy()[0]
plt.plot(originX, originY)
plt.plot(originX, w * originX + b)
plt.show()# %%
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# 产生数据
# 模拟直线y = 2x + 6
originX = tf.convert_to_tensor(np.linspace(-3, 3, 500), dtype = tf.double)
# 产生噪音
originY = 2 * originX + 6 + tf.random.normal((originX.shape[0], ), 0, 0.5, dtype = tf.double)
# %%
X = tf.cast(originX, tf.float32)
y = tf.cast(originY, tf.float32)
# 配对
train = tf.data.Dataset.from_tensor_slices((X, y)).batch(32)
# 定义神经网络训练参数
# [1, 1]表示输入层有一个神经元,隐藏层也有一个神经元,这里隐藏层也是输出层
# [1]表示偏执
w = tf.Variable(tf.random.truncated_normal([1, 1], stddev = 0.1, seed = 1))
b = tf.Variable(tf.random.truncated_normal([1], stddev = 0.1, seed = 1))
# 定义学习率、训练次数
learnRate = 0.01
epoch = 10
lossAll = 0
for epoch in range(epoch):
for step, (xTrain, yTrain) in enumerate(train):
xTrain = tf.reshape(xTrain, (xTrain.shape[0], 1))
with tf.GradientTape() as tape:
# 计算y,y = Σwx + b
y = tf.matmul(xTrain, w) + b
# 不适用激活函数
y = y
# 计算损失函数
loss = tf.reduce_mean(
tf.square(yTrain - y)
)
lossAll += loss.numpy()
# 对损失函数的每个自变量求导数
gradient = tape.gradient(loss, [w, b])
# 梯度下降
w.assign_sub(learnRate * gradient[0])
b.assign_sub(learnRate * gradient[1])
print(f"Epoch: {
epoch}, Loss: {
lossAll / 4}")
lossAll = 0
print(w)
print(b)
w = w.numpy()[0][0]
b = b.numpy()[0]
plt.plot(originX, originY)
plt.plot(originX, w * originX + b)
plt.show()
2. Build a binary classification neural network
(1) Generate data
# 搭建多分类神经网络
# 产生数据
import pandas as pd
import numpy as np
np.random.seed(43)
df = pd.DataFrame({
'color': ['black'] * 5 + ['white'] * 5,
'age': np.random.rand(10) * 10,
'weight': np.random.rand(10) * 100,
'type': ['cat'] * 5 + ['dog'] * 5,
})
print(df[0:10])
color age weight type
0 black 1.150546 39.495002 cat
1 black 6.090665 80.204712 cat
2 black 1.333910 25.442113 cat
3 black 2.405896 5.688494 cat
4 black 3.271391 86.664864 cat
5 white 8.591375 22.102900 dog
6 white 6.660902 40.498945 dog
7 white 5.411622 31.609647 dog
8 white 0.290138 7.666270 dog
9 white 7.337483 84.322469 dog
(2) one-hot encoding
Since all data is stored in the form of numbers in the neural network, it is necessary to perform one-hot encoding on the value of the color feature and the type tag
# one-hot编码
oneHotColor = pd.get_dummies(df['color'])
oneHotType = pd.get_dummies(df['type'])
# 删除颜色列,axis = 1表示列
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
# 列重命名
df.rename(columns = {
'black': 'color'}, inplace = True)
# 删除类型列,axis = 1表示列
df = df.drop('type', axis = 1)
df = pd.concat([df, oneHotType['cat']], axis = 1)
df.rename(columns = {
'cat': 'type'}, inplace = True)
print(df)
age weight color type
0 1.150546 39.495002 1 1
1 6.090665 80.204712 1 1
2 1.333910 25.442113 1 1
3 2.405896 5.688494 1 1
4 3.271391 86.664864 1 1
5 8.591375 22.102900 0 0
6 6.660902 40.498945 0 0
7 5.411622 31.609647 0 0
8 0.290138 7.666270 0 0
9 7.337483 84.322469 0 0
It can be seen that the coded cat is 1, and the coded dog is 0
(3) Make a training set
# 制作训练集
X = df[['color', 'age', 'weight']].values
y = df[['type']].values
(4) Build a neural network
1) Build a neural network normally
print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential()
# 输入层,[3, 50]
model.add(keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'))
# 隐藏层,[50, 25]
model.add(keras.layers.Dense(25, activation = 'relu'))
# 输出层,[25, 1]
model.add(keras.layers.Dense(y.shape[1], activation = 'sigmoid'))
# 查看模型概况
model.summary()
(10, 3)
(10, 1)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 50) 200
dense_2 (Dense) (None, 25) 1275
dense_3 (Dense) (None, 1) 26
=================================================================
Total params: 1,501
Trainable params: 1,501
Non-trainable params: 0
_________________________________________________________________
The Dense layer represents the fully connected layer, that is, the neuron weights between adjacent layers are all connected to each other.
2) Embedded mode to build a neural network
print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential([
# 输入层,[3, 50]
keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
# 隐藏层,[50, 25]
keras.layers.Dense(25, activation = 'relu'),
# 输出层,[25, 1]
keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])
# 查看模型概况
model.summary()
(10, 3)
(10, 1)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 50) 200
dense_2 (Dense) (None, 25) 1275
dense_3 (Dense) (None, 1) 26
=================================================================
Total params: 1,501
Trainable params: 1,501
Non-trainable params: 0
_________________________________________________________________
(5) Configure the loss function and optimizer and train
# 配置损失函数和优化器
model.compile(
loss = 'binary_crossentropy', optimizer = 'SGD'
)
model.fit(X, y, epochs = 500)
......
Epoch 489/500
1/1 [==============================] - 0s 0s/step - loss: 0.3395
Epoch 490/500
1/1 [==============================] - 0s 0s/step - loss: 0.3512
Epoch 491/500
1/1 [==============================] - 0s 10ms/step - loss: 0.3420
Epoch 492/500
1/1 [==============================] - 0s 0s/step - loss: 0.3536
Epoch 493/500
1/1 [==============================] - 0s 0s/step - loss: 0.3433
Epoch 494/500
1/1 [==============================] - 0s 0s/step - loss: 0.3570
Epoch 495/500
1/1 [==============================] - 0s 0s/step - loss: 0.3676
Epoch 496/500
1/1 [==============================] - 0s 0s/step - loss: 0.3716
Epoch 497/500
1/1 [==============================] - 0s 0s/step - loss: 0.3418
Epoch 498/500
1/1 [==============================] - 0s 0s/step - loss: 0.3489
Epoch 499/500
1/1 [==============================] - 0s 0s/step - loss: 0.3333
Epoch 500/500
1/1 [==============================] - 0s 10ms/step - loss: 0.3408
binary_crossentropy
is the binary cross entropy for binary classification
SGD
is the gradient descent method
(6) test
# 测试
xTest = pd.DataFrame({
'color': ['black'] * 2,
'age': np.random.rand(1) * 10,
'weight': np.random.rand(1) * 100
})
oneHotColor = pd.get_dummies(xTest['color'])
# 删除颜色列,axis = 1表示列
xTest = xTest.drop('color', axis = 1)
xTest = pd.concat([xTest, oneHotColor['black']], axis = 1)
# 列重命名
xTest.rename(columns = {
'black': 'color'}, inplace = True)
print(xTest)
print(model.predict(xTest.values))
age weight color
0 3.853769 95.448813 1
1/1 [==============================] - 0s 70ms/step
[[1.8637778e-09]]
The result calculated here is close to 0, so the result of changing the sample prediction is dog
(7) Complete process
import pandas as pd
import numpy as np
import tensorflow as tf
np.random.seed(43)
df = pd.DataFrame({
'color': ['black'] * 5 + ['white'] * 5,
'age': np.random.rand(10) * 10,
'weight': np.random.rand(10) * 100,
'type': ['cat'] * 5 + ['dog'] * 5,
})
print(df[0:10])
# %%
# one-hot编码
oneHotColor = pd.get_dummies(df['color'])
oneHotType = pd.get_dummies(df['type'])
# 删除颜色列,axis = 1表示列
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
# 列重命名
df.rename(columns = {
'black': 'color'}, inplace = True)
# 删除类型列,axis = 1表示列
df = df.drop('type', axis = 1)
df = pd.concat([df, oneHotType['cat']], axis = 1)
df.rename(columns = {
'cat': 'type'}, inplace = True)
print(df)
# %%
# 制作训练集
X = df[['color', 'age', 'weight']].values
y = df[['type']].values
print(X)
print(y)
# %%
from tensorflow import keras
print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential()
# 输入层
model.add(keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'))
# 隐藏层
model.add(keras.layers.Dense(25, activation = 'relu'))
# 输出层
model.add(keras.layers.Dense(y.shape[1], activation = 'sigmoid'))
# 查看模型概况
model.summary()
# %%
print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential([
# 输入层,[3, 50]
keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
# 隐藏层,[50, 25]
keras.layers.Dense(25, activation = 'relu'),
# 输出层,[25, 1]
keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])
# 查看模型概况
model.summary()
# %%
# 配置损失函数和优化器
model.compile(
loss = 'binary_crossentropy', optimizer = 'SGD'
)
model.fit(X, y, epochs = 500)
# %%
# 测试
xTest = pd.DataFrame({
'color': ['black'],
'age': np.random.rand(1) * 10,
'weight': np.random.rand(1) * 100
})
oneHotColor = pd.get_dummies(xTest['color'])
# 删除颜色列,axis = 1表示列
xTest = xTest.drop('color', axis = 1)
xTest = pd.concat([xTest, oneHotColor['black']], axis = 1)
# 列重命名
xTest.rename(columns = {
'black': 'color'}, inplace = True)
print(xTest)
print(model.predict(xTest.values))
3. Building a Recurrent Neural Network
import tensorflow as tf
import numpy as np
import pandas as pd
df = pd.DataFrame({
'color': ['black'] * 5 + ['white'] * 5,
'age': np.random.rand(10) * 10,
'weight': np.random.rand(10) * 100,
'sleep_time': np.random.rand(10) * 24
})
oneHotColor = pd.get_dummies(df['color'])
print(oneHotColor)
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
df.rename(columns = {
'black': 'color'}, inplace = True)
print(df)
X = df[['color', 'age', 'weight']].values
y = df['sleep_time']
model = tf.keras.Sequential([
tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
tf.keras.layers.Dense(25, activation = 'relu'),
tf.keras.layers.Dense(1)
])
model.summary()
model.compile(loss = tf.keras.losses.MeanSquaredError(),
optimizer = tf.keras.optimizers.experimental.SGD())
model.fit(X, y, epochs = 500)
yPredict = []
for i in range(10):
predict = model.predict(df[i:i+1][['color', 'age', 'weight']].values)
yPredict.append(predict[0][0])
print(yPredict)
import matplotlib.pyplot as plt
origin = plt.plot([_ for _ in range(10)], df['sleep_time'].values)
predict = plt.plot([_ for _ in range(10)], yPredict)
plt.show()
4. Use the class method to build a neural network
__init__()
The function is used to define the layers used, and call()
the function defines the forward propagation process of the neural network
Take the regressive neural network as an example
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
class MyModel(tf.keras.Model):
def __init__(self, X, y):
super().__init__()
self.d1 = tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu')
self.d2 = tf.keras.layers.Dense(25, activation = 'relu')
self.d3 = tf.keras.layers.Dense(1)
def call(self, X):
x = self.d1(X)
x = self.d2(x)
return self.d3(x)
df = pd.DataFrame({
'color': ['black'] * 5 + ['white'] * 5,
'age': np.random.rand(10) * 10,
'weight': np.random.rand(10) * 100,
'sleep_time': np.random.rand(10) * 24
})
oneHotColor = pd.get_dummies(df['color'])
print(oneHotColor)
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
df.rename(columns = {
'black': 'color'}, inplace = True)
print(df)
X = df[['color', 'age', 'weight']].values
y = df['sleep_time']
model = MyModel(X, y)
model.compile(loss = tf.keras.losses.MeanSquaredError(),
optimizer = tf.keras.optimizers.experimental.Adam())
model.fit(X, y, epochs = 500)
yPredict = []
for i in range(10):
predict = model.predict(df[i:i+1][['color', 'age', 'weight']].values)
yPredict.append(predict[0][0])
print(yPredict)
origin = plt.plot([_ for _ in range(10)], df['sleep_time'].values)
predict = plt.plot([_ for _ in range(10)], yPredict)
plt.show()
5. Model training, saving and loading
(1) Dropout layer
Dropout technology refers to temporarily discarding some neurons according to a certain probability during the training process of the deep neural network, and these discarded neurons do not actually participate in the entire training process, so as to achieve the purpose of reducing the amount of network parameters at one time , the use of Dropout technology can link the phenomenon of overfitting
Add the Dropout layer to the built network
model = tf.keras.Sequential([
tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(25, activation = 'relu'),
tf.keras.layers.Dense(1)
])
Here 0.2 represents a ratio, that is, to deactivate 20% of neurons
(2) EarlyStopping
EarlyStopping is known as early listening, and it is usually regarded as a regularization method that can avoid overfitting of the network
Principle: Stop the network model before it is overfitted, and maintain the weight parameters of the model to the current best state
Use the callback function callbacks in tensorflow to directly call the EarlyStopping method, and use it with the verification set
import tensorflow as tf
import pandas as pd
import numpy as np
np.random.seed(43)
df = pd.DataFrame({
'color': ['black'] * 10 + ['white'] * 10,
'age': np.random.rand(20) * 10,
'weight': np.random.rand(20) * 100,
'type': ['cat'] * 10 + ['dog'] * 10,
})
oneHotColor = pd.get_dummies(df['color'])
oneHotType = pd.get_dummies(df['type'])
df = df.drop(['color'], axis = 1)
df = df.drop(['type'], axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
df = pd.concat([df, oneHotType['cat']], axis = 1)
df.rename(columns = {
'black': 'color',
'cat': 'type'
}, inplace = True)
X = df[['color', 'age', 'weight']].values
y = df[['type']].values
xTrain = X[:15]
yTrain = y[:15]
xValid = X[15:]
yValid = y[15:]
callback = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 4)
model = tf.keras.Sequential([
tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(25, activation = 'relu'),
tf.keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])
model.summary()
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
optimizer = tf.keras.optimizers.experimental.Adam())
model.fit(X, y, epochs = 500, validation_data = (xValid, yValid), callbacks = [callback])
......
Epoch 22/500
1/1 [==============================] - 0s 22ms/step - loss: 2.4001 - val_loss: 0.1362
Epoch 23/500
1/1 [==============================] - 0s 21ms/step - loss: 2.5386 - val_loss: 0.1242
Epoch 24/500
1/1 [==============================] - 0s 20ms/step - loss: 1.5711 - val_loss: 0.1143
Epoch 25/500
1/1 [==============================] - 0s 20ms/step - loss: 2.0007 - val_loss: 0.1113
Epoch 26/500
1/1 [==============================] - 0s 20ms/step - loss: 1.5747 - val_loss: 0.1105
Epoch 27/500
1/1 [==============================] - 0s 22ms/step - loss: 2.8826 - val_loss: 0.1147
Epoch 28/500
1/1 [==============================] - 0s 23ms/step - loss: 1.8719 - val_loss: 0.1209
Epoch 29/500
1/1 [==============================] - 0s 24ms/step - loss: 2.5716 - val_loss: 0.1323
Epoch 30/500
1/1 [==============================] - 0s 26ms/step - loss: 2.2206 - val_loss: 0.1505
It can be seen from the results that the model training is over after 30 times
Check the value of val_loss and find that it keeps increasing, which indicates that the degree of fitting is getting deeper, so EarlyStopping is started
(3) Save the model
There are two ways to save: through checkpoints of the callback function, converted into h5 file
(1) checkpoints
Use the callback function checkpoint to set related parameters
tf.keras.callbacks.ModelCheckpoint(
filepath, 保存路径
monitor: str = 'val_loss', 监视的值
verbose: int = 0, 详细模式,0为不详细,1为详细
save_best_only: bool = False, 是否只保存最好的模型参数
save_weights_only: bool = False, 是否只保存模型的权重参数,如果为False,表示对整个模型都进行保存
)
earlyStopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 4)
checkpoint = tf.keras.callbacks.ModelCheckpoint(
filepath = 'training_model/cp.ckpt',
save_best_only = True,
save_weights_only = True,
verbose = 1
)
model = tf.keras.Sequential([
tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(25, activation = 'relu'),
tf.keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])
model.summary()
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
optimizer = tf.keras.optimizers.experimental.Adam())
model.fit(X, y, epochs = 500, validation_data = (xValid, yValid), callbacks = [earlyStopping, checkpoint])
load_weights()
Read model parameters using
# 加载模型数据
model = tf.keras.Sequential([
tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(25, activation = 'relu'),
tf.keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])
model.summary()
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
optimizer = tf.keras.optimizers.experimental.Adam())
model.load_weights('./training_model/cp.ckpt')
(2) h5 file
keep
model.save('./training_model/mymodel.h5')
load
model = tf.keras.models.load_model('./training_model/mymodel.h5')
Note: HDF5 format does not save optimizer_experimental.Optimizer
weights