This case is suitable for science and engineering.
To undertake the hard-core case of the previous article: Python data analysis case 24 - Lithium battery life prediction based on deep learning
This case is similar, but it further expands the time series forecasting to multivariate situations. The time series of the previous case has only one characteristic variable of battery capacity, and now multiple variables are used to construct the neural network model.
case background
Offshore wind power is the best and most popular project, and accurate prediction is naturally very important.
This time, we simply use some common neural networks to compare the prediction effects. (Try a small case by hand)
The dataset has many characteristics, as follows:
V is the wind speed, D is the wind direction, and a bunch of characteristics of the air humidity balabala, and the last column is the electric power.
code preparation
Similar to the previous case, there are a large number of custom functions.
First import the package,
import os
import math
import time
import datetime
import random as rn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams ['font.sans-serif'] ='SimHei' #显示中文
plt.rcParams ['axes.unicode_minus']=False #显示负号
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error,r2_score
import tensorflow as tf
import keras
from keras.models import Model, Sequential
from keras.layers import GRU, Dense,Conv1D, MaxPooling1D,GlobalMaxPooling1D,Embedding,Dropout,Flatten,SimpleRNN,LSTM
from keras.callbacks import EarlyStopping
#from tensorflow.keras import regularizers
#from keras.utils.np_utils import to_categorical
from tensorflow.keras import optimizers
Read the data, because the amount of data is too large, I only took the first 1000 to test the water
data0=pd.read_excel('5.xlsx').iloc[:1000,:].set_index('Sequence No.').rename(columns={'y (% relative to rated power)':'y'})
data0.head()
Define random number seed function and evaluation function
def set_my_seed():
os.environ['PYTHONHASHSEED'] = '0'
np.random.seed(1)
rn.seed(12345)
tf.random.set_seed(123)
def evaluation(y_test, y_predict):
mae = mean_absolute_error(y_test, y_predict)
mse = mean_squared_error(y_test, y_predict)
rmse = np.sqrt(mean_squared_error(y_test, y_predict))
mape=(abs(y_predict -y_test)/ y_test).mean()
r_2=r2_score(y_test, y_predict)
return mae, rmse, mape,r_2 #mse
Build test and training set functions for sequence data
def build_sequences(text, window_size=24):
#text:list of capacity
x, y = [],[]
for i in range(len(text) - window_size):
sequence = text[i:i+window_size]
target = text[i+window_size]
x.append(sequence)
y.append(target)
return np.array(x), np.array(y)
def get_traintest(data,train_size=len(data0),window_size=24):
train=data[:train_size]
test=data[train_size-window_size:]
X_train,y_train=build_sequences(train,window_size=window_size)
X_test,y_test=build_sequences(test,window_size=window_size)
return X_train,y_train[:,-1],X_test,y_test[:,-1]
Build five model functions, as well as functions for drawing loss graphs and fitting graphs.
def build_model(X_train,mode='LSTM',hidden_dim=[32,16]):
set_my_seed()
model = Sequential()
if mode=='RNN':
#RNN
model.add(SimpleRNN(hidden_dim[0],return_sequences=True, input_shape=(X_train.shape[-2],X_train.shape[-1])))
model.add(SimpleRNN(hidden_dim[1]))
elif mode=='MLP':
model.add(Dense(hidden_dim[0],activation='relu',input_shape=(X_train.shape[-2],X_train.shape[-1])))
model.add(Flatten())
model.add(Dense(hidden_dim[1],activation='relu'))
elif mode=='LSTM':
# LSTM
model.add(LSTM(hidden_dim[0],return_sequences=True, input_shape=(X_train.shape[-2],X_train.shape[-1])))
model.add(LSTM(hidden_dim[1]))
elif mode=='GRU':
#GRU
model.add(GRU(hidden_dim[0],return_sequences=True, input_shape=(X_train.shape[-2],X_train.shape[-1])))
model.add(GRU(hidden_dim[1]))
elif mode=='CNN':
#一维卷积
model.add(Conv1D(hidden_dim[0], kernel_size=3, padding='causal', strides=1, activation='relu', dilation_rate=1, input_shape=(X_train.shape[-2],X_train.shape[-1])))
#model.add(MaxPooling1D())
model.add(Conv1D(hidden_dim[1], kernel_size=3, padding='causal', strides=1, activation='relu', dilation_rate=2))
#model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='Adam', loss='mse',metrics=[tf.keras.metrics.RootMeanSquaredError(),"mape","mae"])
return model
def plot_loss(hist,imfname=''):
plt.subplots(1,4,figsize=(16,2))
for i,key in enumerate(hist.history.keys()):
n=int(str('14')+str(i+1))
plt.subplot(n)
plt.plot(hist.history[key], 'k', label=f'Training {key}')
plt.title(f'{imfname} Training {key}')
plt.xlabel('Epochs')
plt.ylabel(key)
plt.legend()
plt.tight_layout()
plt.show()
def plot_fit(y_test, y_pred):
plt.figure(figsize=(4,2))
plt.plot(y_test, color="red", label="actual")
plt.plot(y_pred, color="blue", label="predict")
plt.title(f"拟合值和真实值对比")
plt.xlabel("Time")
plt.ylabel('power')
plt.legend()
plt.show()
Define the training function and prepare two data frames, one for evaluation indicators and one for prediction results.
df_eval_all=pd.DataFrame(columns=['MAE','RMSE','MAPE','R2'])
df_preds_all=pd.DataFrame()
def train_fuc(mode='LSTM',window_size=64,batch_size=32,epochs=50,hidden_dim=[32,16],train_ratio=0.8,show_loss=True,show_fit=True):
#准备数据
data=data0.to_numpy()
#归一化
scaler = MinMaxScaler()
scaler = scaler.fit(data[:,:-1])
X=scaler.transform(data[:,:-1])
y_scaler = MinMaxScaler()
y_scaler = y_scaler.fit(data[:,-1].reshape(-1,1))
y=y_scaler.transform(data[:,-1].reshape(-1,1))
train_size=int(len(data)*train_ratio)
X_train,y_train,X_test,y_test=get_traintest(np.c_[X,y],window_size=window_size,train_size=train_size)
print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)
#构建模型
s = time.time()
set_my_seed()
model=build_model(X_train=X_train,mode=mode,hidden_dim=hidden_dim)
earlystop = EarlyStopping(monitor='loss', min_delta=0, patience=5)
hist=model.fit(X_train, y_train,batch_size=batch_size,epochs=epochs,callbacks=[earlystop],verbose=0)
if show_loss:
plot_loss(hist)
#预测
y_pred = model.predict(X_test)
y_pred = y_scaler.inverse_transform(y_pred)
y_test = y_scaler.inverse_transform(y_test.reshape(-1,1))
#print(f'真实y的形状:{y_test.shape},预测y的形状:{y_pred.shape}')
if show_fit:
plot_fit(y_test, y_pred)
e=time.time()
print(f"运行时间为{round(e-s,3)}")
df_preds_all[mode]=y_pred.reshape(-1,)
s=list(evaluation(y_test, y_pred))
df_eval_all.loc[f'{mode}',:]=s
s=[round(i,3) for i in s]
print(f'{mode}的预测效果为:MAE:{s[0]},RMSE:{s[1]},MAPE:{s[2]},R2:{s[3]}')
print("=======================================运行结束==========================================")
Initialization parameters:
window_size=64
batch_size=32
epochs=50
hidden_dim=[32,16]
train_ratio=0.8
show_fit=True
show_loss=True
mode='LSTM' #RNN,GRU,CNN
Neural Networks
So many custom functions are encapsulated above for the convenience of the following training,
The input parameters can be trained and then evaluated.
LSTM network
train_fuc(mode='LSTM')
You can see the loss and fit plots printed clearly, followed by the calculation of the error metrics.
If you want to change the parameters, just change them directly in the training function. For example, if you want to make the sliding window smaller, change it to 16, and change the number of neurons a little more, change it to 128 and 32, just write it like this:
train_fuc(mode='LSTM',window_size=16,hidden_dim=[128,32])
It can be seen that the goodness of fit has increased a bit.
Can continue to adjust.
RNN prediction
Just modify the mode parameter.
mode='RNN'
set_my_seed()
train_fuc(mode=mode,window_size=window_size,batch_size=32,epochs=epochs,hidden_dim=hidden_dim)
GRU forecast
mode='GRU'
set_my_seed()
train_fuc(mode=mode,window_size=window_size,batch_size=batch_size,epochs=epochs,hidden_dim=hidden_dim)
The effect is very good!
One-dimensional CNN
mode='CNN'
set_my_seed()
train_fuc(mode=mode,window_size=window_size,batch_size=batch_size,epochs=epochs,hidden_dim=hidden_dim)
MLP
mode='MLP'
set_my_seed()
train_fuc(mode=mode,window_size=window_size,batch_size=batch_size,epochs=60,hidden_dim=hidden_dim)
Evaluation index
Check:
df_eval_all
It can be seen that the prediction effect of the model is not bad, more than 90%. The GRU and other cyclic neural networks work very well. LSTM adjusted the parameters and the effect is the best.
forecast result:
df_preds_all
Then you can use this to draw pictures or something: