#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2023/3/1 16:41
# @Author : sparkle_code_guy
import pandas as pd
import tensorflow as tf
from keras.losses import MSE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
df = pd.read_csv('../dataset/GOOG-year.csv')
minmax = MinMaxScaler()
minmax.fit(df.iloc[:, 4:5].astype('float32'))
# Close index
df_log = minmax.transform(df.iloc[:, 4:5].astype('float32')) # Close index
df_log = df_log.tolist()
#归一化
days = 30
def simcse_generater():
def map_example_to_dict(input_ids, label):
return {
"stock_price30days":input_ids
}, label
def gen_train_data(ds):
input_ids_list = []
label_list = []
for i in range(days, len(ds)):
input_ids_list.append(ds[i - days:i])
label_list.append(ds[i])
return input_ids_list,label_list
# train dataset
batch_size = 100
x_data,y_data = gen_train_data(df_log)
x_train,x_test = train_test_split(x_data,train_size=0.8)
y_train,y_test = train_test_split(y_data,train_size=0.8)
return tf.data.Dataset.from_tensor_slices(
(x_train, y_train)).map(map_example_to_dict).batch(batch_size), tf.data.Dataset.from_tensor_slices(
(x_test, y_test)).map(map_example_to_dict).batch(batch_size),
lstm_layer_num = 1
input_tensor = tf.keras.layers.Input(shape=(None,1),name="stock_price30days",dtype=tf.float64)
first_forward = tf.keras.layers.LSTM(64, dropout=0.3, recurrent_dropout=0.3,
go_backwards=False, return_sequences=False, return_state=False)(input_tensor)
if lstm_layer_num>1:
for _ in range(lstm_layer_num-1):
first_forward = tf.keras.layers.LSTM(64, dropout=0.3, recurrent_dropout=0.3,
go_backwards=False, return_sequences=False, return_state=False)(
first_forward)
dense1 = tf.keras.layers.Dense(1, activation='sigmoid')(first_forward)
my_model = tf.keras.Model(inputs=input_tensor,outputs=dense1)
print(my_model(tf.constant([df_log[0:30]])))
my_model.compile(optimizer="adam", loss=tf.keras.losses.MSE)
callbacks = [
tf.keras.callbacks.ModelCheckpoint(filepath='base_path/als_models/' + '{epoch: 02d}.h5')
]
train_tf_data,test_tf_data =simcse_generater()
my_model.fit(train_tf_data, batch_size=4, epochs=2, verbose=1,validation_data=test_tf_data,shuffle=True,callbacks=callbacks)
def plot_test_data_predict(my_model,minmax,test_tf_data):
real_value_list = []
predict_value_list = []
for each_data_batch,labels in test_tf_data:
labels = minmax.inverse_transform(labels)
real_value_list.extend(list(tf.reshape(labels,-1).numpy()))
predict_data = my_model(each_data_batch)
predict_data = minmax.inverse_transform(predict_data)
predict_value_list.extend(list(tf.reshape(predict_data,-1).numpy()))
import matplotlib.pyplot as plt
# plt.figure(figsize=(15, 5))
data_position = [i for i in range(len(real_value_list))]
plt.plot(data_position, real_value_list,label='real stock price', color='g')
plt.plot(data_position, predict_value_list,label='predict stock price', color='r')
# plt.plot( data_position, predict_value_list, 'r-')
plt.legend()
plt.title('message plot')
plt.show()
plot_test_data_predict(my_model,minmax,test_tf_data)
本文实现了基于lstm的预测模型的训练代码,感兴趣的自行复制