TensorFlow builds a neural network

TensorFlow builds a neural network

TensorFlow builds a neural network

1. Build a binary classification neural network

# %%
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt


# 产生数据
# 模拟直线y = 2x + 6
originX = tf.convert_to_tensor(np.linspace(-3, 3, 500), dtype = tf.double)
# 产生噪音
originY = 2 * originX + 6 + tf.random.normal((originX.shape[0], ), 0, 0.5, dtype = tf.double)

# %%
X = tf.cast(originX, tf.float32)
y = tf.cast(originY, tf.float32)

# 配对
train = tf.data.Dataset.from_tensor_slices((X, y)).batch(32)

# 定义神经网络训练参数
# [1, 1]表示输入层有一个神经元，隐藏层也有一个神经元
# [1]表示输出层有一个神经元
w = tf.Variable(tf.random.truncated_normal([1, 1], stddev = 0.1, seed = 1))
b = tf.Variable(tf.random.truncated_normal([1], stddev = 0.1, seed = 1))

# 定义学习率、训练次数
learnRate = 0.01
epoch = 10
lossAll = 0

for epoch in range(epoch):
    for step, (xTrain, yTrain) in enumerate(train):
        xTrain = tf.reshape(xTrain, (xTrain.shape[0], 1))
        with tf.GradientTape() as tape:
            # 计算y，y = Σwx + b            
            y = tf.matmul(xTrain, w) + b
            # 计算损失函数
            loss = tf.reduce_mean(
                tf.square(yTrain - y)
            )
            lossAll += loss.numpy()
            # 对损失函数的每个自变量求导数
            gradient = tape.gradient(loss, [w, b])
        # 梯度下降
        w.assign_sub(learnRate * gradient[0])
        b.assign_sub(learnRate * gradient[1])

    print(f"Epoch: {
      
      epoch}, Loss: {
      
      lossAll / 4}")
    lossAll = 0

print(w)
print(b)

w = w.numpy()[0][0]
b = b.numpy()[0]


plt.plot(originX, originY)
plt.plot(originX, w * originX + b)
plt.show()# %%
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt


# 产生数据
# 模拟直线y = 2x + 6
originX = tf.convert_to_tensor(np.linspace(-3, 3, 500), dtype = tf.double)
# 产生噪音
originY = 2 * originX + 6 + tf.random.normal((originX.shape[0], ), 0, 0.5, dtype = tf.double)

# %%
X = tf.cast(originX, tf.float32)
y = tf.cast(originY, tf.float32)

# 配对
train = tf.data.Dataset.from_tensor_slices((X, y)).batch(32)

# 定义神经网络训练参数
# [1, 1]表示输入层有一个神经元，隐藏层也有一个神经元，这里隐藏层也是输出层
# [1]表示偏执
w = tf.Variable(tf.random.truncated_normal([1, 1], stddev = 0.1, seed = 1))
b = tf.Variable(tf.random.truncated_normal([1], stddev = 0.1, seed = 1))

# 定义学习率、训练次数
learnRate = 0.01
epoch = 10
lossAll = 0

for epoch in range(epoch):
    for step, (xTrain, yTrain) in enumerate(train):
        xTrain = tf.reshape(xTrain, (xTrain.shape[0], 1))
        with tf.GradientTape() as tape:
            # 计算y，y = Σwx + b            
            y = tf.matmul(xTrain, w) + b
            # 不适用激活函数
            y = y
            # 计算损失函数
            loss = tf.reduce_mean(
                tf.square(yTrain - y)
            )
            lossAll += loss.numpy()
            # 对损失函数的每个自变量求导数
            gradient = tape.gradient(loss, [w, b])
        # 梯度下降
        w.assign_sub(learnRate * gradient[0])
        b.assign_sub(learnRate * gradient[1])

    print(f"Epoch: {
      
      epoch}, Loss: {
      
      lossAll / 4}")
    lossAll = 0

print(w)
print(b)

w = w.numpy()[0][0]
b = b.numpy()[0]


plt.plot(originX, originY)
plt.plot(originX, w * originX + b)
plt.show()

2. Build a binary classification neural network

(1) Generate data

# 搭建多分类神经网络
# 产生数据
import pandas as pd
import numpy as np


np.random.seed(43)
df = pd.DataFrame({
    
    
    'color': ['black'] * 5 + ['white'] * 5,
    'age': np.random.rand(10) * 10,
    'weight': np.random.rand(10) * 100,
    'type': ['cat'] * 5 + ['dog'] * 5,
})
print(df[0:10])

   color       age     weight type
0  black  1.150546  39.495002  cat
1  black  6.090665  80.204712  cat
2  black  1.333910  25.442113  cat
3  black  2.405896   5.688494  cat
4  black  3.271391  86.664864  cat
5  white  8.591375  22.102900  dog
6  white  6.660902  40.498945  dog
7  white  5.411622  31.609647  dog
8  white  0.290138   7.666270  dog
9  white  7.337483  84.322469  dog

(2) one-hot encoding

Since all data is stored in the form of numbers in the neural network, it is necessary to perform one-hot encoding on the value of the color feature and the type tag

# one-hot编码
oneHotColor = pd.get_dummies(df['color'])
oneHotType = pd.get_dummies(df['type'])
# 删除颜色列，axis = 1表示列
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
# 列重命名
df.rename(columns = {
    
    'black': 'color'}, inplace = True)
# 删除类型列，axis = 1表示列
df = df.drop('type', axis = 1)
df = pd.concat([df, oneHotType['cat']], axis = 1)
df.rename(columns = {
    
    'cat': 'type'}, inplace = True)
print(df)

        age     weight  color  type
0  1.150546  39.495002      1     1
1  6.090665  80.204712      1     1
2  1.333910  25.442113      1     1
3  2.405896   5.688494      1     1
4  3.271391  86.664864      1     1
5  8.591375  22.102900      0     0
6  6.660902  40.498945      0     0
7  5.411622  31.609647      0     0
8  0.290138   7.666270      0     0
9  7.337483  84.322469      0     0

It can be seen that the coded cat is 1, and the coded dog is 0

(3) Make a training set

# 制作训练集
X = df[['color', 'age', 'weight']].values
y = df[['type']].values

(4) Build a neural network

1) Build a neural network normally

print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential()
# 输入层，[3, 50]
model.add(keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'))
# 隐藏层，[50, 25]
model.add(keras.layers.Dense(25, activation = 'relu'))
# 输出层，[25, 1]
model.add(keras.layers.Dense(y.shape[1], activation = 'sigmoid'))

# 查看模型概况
model.summary()

(10, 3)
(10, 1)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_1 (Dense)             (None, 50)                200       
                                                                 
 dense_2 (Dense)             (None, 25)                1275      
                                                                 
 dense_3 (Dense)             (None, 1)                 26        
                                                                 
=================================================================
Total params: 1,501
Trainable params: 1,501
Non-trainable params: 0
_________________________________________________________________

The Dense layer represents the fully connected layer, that is, the neuron weights between adjacent layers are all connected to each other.

2) Embedded mode to build a neural network

print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential([
    # 输入层，[3, 50]
    keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
    # 隐藏层，[50, 25]
    keras.layers.Dense(25, activation = 'relu'),
    # 输出层，[25, 1]
    keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])

# 查看模型概况
model.summary()

(10, 3)
(10, 1)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_1 (Dense)             (None, 50)                200       
                                                                 
 dense_2 (Dense)             (None, 25)                1275      
                                                                 
 dense_3 (Dense)             (None, 1)                 26        
                                                                 
=================================================================
Total params: 1,501
Trainable params: 1,501
Non-trainable params: 0
_________________________________________________________________

(5) Configure the loss function and optimizer and train

# 配置损失函数和优化器
model.compile(
    loss = 'binary_crossentropy', optimizer = 'SGD'
)
model.fit(X, y, epochs = 500)

......
Epoch 489/500
1/1 [==============================] - 0s 0s/step - loss: 0.3395
Epoch 490/500
1/1 [==============================] - 0s 0s/step - loss: 0.3512
Epoch 491/500
1/1 [==============================] - 0s 10ms/step - loss: 0.3420
Epoch 492/500
1/1 [==============================] - 0s 0s/step - loss: 0.3536
Epoch 493/500
1/1 [==============================] - 0s 0s/step - loss: 0.3433
Epoch 494/500
1/1 [==============================] - 0s 0s/step - loss: 0.3570
Epoch 495/500
1/1 [==============================] - 0s 0s/step - loss: 0.3676
Epoch 496/500
1/1 [==============================] - 0s 0s/step - loss: 0.3716
Epoch 497/500
1/1 [==============================] - 0s 0s/step - loss: 0.3418
Epoch 498/500
1/1 [==============================] - 0s 0s/step - loss: 0.3489
Epoch 499/500
1/1 [==============================] - 0s 0s/step - loss: 0.3333
Epoch 500/500
1/1 [==============================] - 0s 10ms/step - loss: 0.3408

binary_crossentropyis the binary cross entropy for binary classification

SGDis the gradient descent method

(6) test

# 测试
xTest = pd.DataFrame({
    
    
    'color': ['black'] * 2,
    'age': np.random.rand(1) * 10,
    'weight': np.random.rand(1) * 100
})

oneHotColor = pd.get_dummies(xTest['color'])
# 删除颜色列，axis = 1表示列
xTest = xTest.drop('color', axis = 1)
xTest = pd.concat([xTest, oneHotColor['black']], axis = 1)
# 列重命名
xTest.rename(columns = {
    
    'black': 'color'}, inplace = True)
print(xTest)
print(model.predict(xTest.values))

        age     weight  color
0  3.853769  95.448813      1
1/1 [==============================] - 0s 70ms/step
[[1.8637778e-09]]

The result calculated here is close to 0, so the result of changing the sample prediction is dog

(7) Complete process

import pandas as pd
import numpy as np
import tensorflow as tf


np.random.seed(43)
df = pd.DataFrame({
    
    
    'color': ['black'] * 5 + ['white'] * 5,
    'age': np.random.rand(10) * 10,
    'weight': np.random.rand(10) * 100,
    'type': ['cat'] * 5 + ['dog'] * 5,
})
print(df[0:10])
# %%

# one-hot编码
oneHotColor = pd.get_dummies(df['color'])
oneHotType = pd.get_dummies(df['type'])
# 删除颜色列，axis = 1表示列
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
# 列重命名
df.rename(columns = {
    
    'black': 'color'}, inplace = True)
# 删除类型列，axis = 1表示列
df = df.drop('type', axis = 1)
df = pd.concat([df, oneHotType['cat']], axis = 1)
df.rename(columns = {
    
    'cat': 'type'}, inplace = True)
print(df)

# %%

# 制作训练集
X = df[['color', 'age', 'weight']].values
y = df[['type']].values
print(X)
print(y)

# %%
from tensorflow import keras


print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential()
# 输入层
model.add(keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'))
# 隐藏层
model.add(keras.layers.Dense(25, activation = 'relu'))
# 输出层
model.add(keras.layers.Dense(y.shape[1], activation = 'sigmoid'))

# 查看模型概况
model.summary()

# %%

print(X.shape)
print(y.shape)
# 搭建神经网络
model = tf.keras.Sequential([
    # 输入层，[3, 50]
    keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
    # 隐藏层，[50, 25]
    keras.layers.Dense(25, activation = 'relu'),
    # 输出层，[25, 1]
    keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])

# 查看模型概况
model.summary()

# %%
# 配置损失函数和优化器
model.compile(
    loss = 'binary_crossentropy', optimizer = 'SGD'
)
model.fit(X, y, epochs = 500)


# %%
# 测试
xTest = pd.DataFrame({
    
    
    'color': ['black'],
    'age': np.random.rand(1) * 10,
    'weight': np.random.rand(1) * 100
})

oneHotColor = pd.get_dummies(xTest['color'])
# 删除颜色列，axis = 1表示列
xTest = xTest.drop('color', axis = 1)
xTest = pd.concat([xTest, oneHotColor['black']], axis = 1)
# 列重命名
xTest.rename(columns = {
    
    'black': 'color'}, inplace = True)
print(xTest)
print(model.predict(xTest.values))

3. Building a Recurrent Neural Network

import tensorflow as tf
import numpy as np
import pandas as pd


df = pd.DataFrame({
    
    
    'color': ['black'] * 5 + ['white'] * 5,
    'age': np.random.rand(10) * 10,
    'weight': np.random.rand(10) * 100,
    'sleep_time': np.random.rand(10) * 24
})

oneHotColor = pd.get_dummies(df['color'])
print(oneHotColor)
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
df.rename(columns = {
    
    'black': 'color'}, inplace = True)

print(df)

X = df[['color', 'age', 'weight']].values
y = df['sleep_time']

model = tf.keras.Sequential([
    tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
    tf.keras.layers.Dense(25, activation = 'relu'),
    tf.keras.layers.Dense(1)
])
model.summary()
model.compile(loss = tf.keras.losses.MeanSquaredError(),
              optimizer = tf.keras.optimizers.experimental.SGD())
model.fit(X, y, epochs = 500)

yPredict = []
for i in range(10):
    predict = model.predict(df[i:i+1][['color', 'age', 'weight']].values)
    yPredict.append(predict[0][0])
print(yPredict)

import matplotlib.pyplot as plt

origin = plt.plot([_ for _ in range(10)], df['sleep_time'].values)
predict = plt.plot([_ for _ in range(10)], yPredict)
plt.show()

4. Use the class method to build a neural network

__init__()The function is used to define the layers used, and call()the function defines the forward propagation process of the neural network

Take the regressive neural network as an example

import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


class MyModel(tf.keras.Model):

    def __init__(self, X, y):
        super().__init__()
        self.d1 = tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu')
        self.d2 = tf.keras.layers.Dense(25, activation = 'relu')
        self.d3 = tf.keras.layers.Dense(1)

    def call(self, X):
        x = self.d1(X)
        x = self.d2(x)
        return self.d3(x)


df = pd.DataFrame({
    
    
    'color': ['black'] * 5 + ['white'] * 5,
    'age': np.random.rand(10) * 10,
    'weight': np.random.rand(10) * 100,
    'sleep_time': np.random.rand(10) * 24
})

oneHotColor = pd.get_dummies(df['color'])
print(oneHotColor)
df = df.drop('color', axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
df.rename(columns = {
    
    'black': 'color'}, inplace = True)

print(df)

X = df[['color', 'age', 'weight']].values
y = df['sleep_time']

model = MyModel(X, y)
model.compile(loss = tf.keras.losses.MeanSquaredError(),
              optimizer = tf.keras.optimizers.experimental.Adam())

model.fit(X, y, epochs = 500)

yPredict = []
for i in range(10):
    predict = model.predict(df[i:i+1][['color', 'age', 'weight']].values)
    yPredict.append(predict[0][0])
print(yPredict)

origin = plt.plot([_ for _ in range(10)], df['sleep_time'].values)
predict = plt.plot([_ for _ in range(10)], yPredict)
plt.show()

5. Model training, saving and loading

(1) Dropout layer

Dropout technology refers to temporarily discarding some neurons according to a certain probability during the training process of the deep neural network, and these discarded neurons do not actually participate in the entire training process, so as to achieve the purpose of reducing the amount of network parameters at one time , the use of Dropout technology can link the phenomenon of overfitting

Add the Dropout layer to the built network

model = tf.keras.Sequential([
    tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(25, activation = 'relu'),
    tf.keras.layers.Dense(1)
])

Here 0.2 represents a ratio, that is, to deactivate 20% of neurons

(2) EarlyStopping

EarlyStopping is known as early listening, and it is usually regarded as a regularization method that can avoid overfitting of the network

Principle: Stop the network model before it is overfitted, and maintain the weight parameters of the model to the current best state

Use the callback function callbacks in tensorflow to directly call the EarlyStopping method, and use it with the verification set

import tensorflow as tf
import pandas as pd
import numpy as np

np.random.seed(43)
df = pd.DataFrame({
    
    
    'color': ['black'] * 10 + ['white'] * 10,
    'age': np.random.rand(20) * 10,
    'weight': np.random.rand(20) * 100,
    'type': ['cat'] * 10 + ['dog'] * 10,
})

oneHotColor = pd.get_dummies(df['color'])
oneHotType = pd.get_dummies(df['type'])

df = df.drop(['color'], axis = 1)
df = df.drop(['type'], axis = 1)
df = pd.concat([df, oneHotColor['black']], axis = 1)
df = pd.concat([df, oneHotType['cat']], axis = 1)
df.rename(columns = {
    
    
    'black': 'color',
    'cat': 'type'
}, inplace = True)

X = df[['color', 'age', 'weight']].values
y = df[['type']].values

xTrain = X[:15]
yTrain = y[:15]
xValid = X[15:]
yValid = y[15:]

callback = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 4)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(25, activation = 'relu'),
    tf.keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])

model.summary()
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
              optimizer = tf.keras.optimizers.experimental.Adam())
model.fit(X, y, epochs = 500, validation_data = (xValid, yValid), callbacks = [callback])

......
Epoch 22/500
1/1 [==============================] - 0s 22ms/step - loss: 2.4001 - val_loss: 0.1362
Epoch 23/500
1/1 [==============================] - 0s 21ms/step - loss: 2.5386 - val_loss: 0.1242
Epoch 24/500
1/1 [==============================] - 0s 20ms/step - loss: 1.5711 - val_loss: 0.1143
Epoch 25/500
1/1 [==============================] - 0s 20ms/step - loss: 2.0007 - val_loss: 0.1113
Epoch 26/500
1/1 [==============================] - 0s 20ms/step - loss: 1.5747 - val_loss: 0.1105
Epoch 27/500
1/1 [==============================] - 0s 22ms/step - loss: 2.8826 - val_loss: 0.1147
Epoch 28/500
1/1 [==============================] - 0s 23ms/step - loss: 1.8719 - val_loss: 0.1209
Epoch 29/500
1/1 [==============================] - 0s 24ms/step - loss: 2.5716 - val_loss: 0.1323
Epoch 30/500
1/1 [==============================] - 0s 26ms/step - loss: 2.2206 - val_loss: 0.1505

It can be seen from the results that the model training is over after 30 times

Check the value of val_loss and find that it keeps increasing, which indicates that the degree of fitting is getting deeper, so EarlyStopping is started

(3) Save the model

There are two ways to save: through checkpoints of the callback function, converted into h5 file

(1) checkpoints

Use the callback function checkpoint to set related parameters

tf.keras.callbacks.ModelCheckpoint(
    filepath,                           保存路径
    monitor: str = 'val_loss',          监视的值
    verbose: int = 0,                   详细模式，0为不详细，1为详细
    save_best_only: bool = False,       是否只保存最好的模型参数
    save_weights_only: bool = False,    是否只保存模型的权重参数，如果为False，表示对整个模型都进行保存
)

earlyStopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 4)
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath = 'training_model/cp.ckpt',
    save_best_only = True,
    save_weights_only = True,
    verbose = 1
)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(25, activation = 'relu'),
    tf.keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])

model.summary()
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
              optimizer = tf.keras.optimizers.experimental.Adam())
model.fit(X, y, epochs = 500, validation_data = (xValid, yValid), callbacks = [earlyStopping, checkpoint])

load_weights()Read model parameters using

# 加载模型数据
model = tf.keras.Sequential([
    tf.keras.layers.Dense(50, input_dim = X.shape[1], activation = 'relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(25, activation = 'relu'),
    tf.keras.layers.Dense(y.shape[1], activation = 'sigmoid')
])

model.summary()
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
              optimizer = tf.keras.optimizers.experimental.Adam())

model.load_weights('./training_model/cp.ckpt')

(2) h5 file

keep

model.save('./training_model/mymodel.h5')

load

model = tf.keras.models.load_model('./training_model/mymodel.h5')

Note: HDF5 format does not save optimizer_experimental.Optimizerweights