Tensorflow Example: Using LSTM to Predict the Daily Highest Stock Price (2)

Reprinted from: Scorpio_Lu - Click to open the link

According to the lowest price, highest price, opening price, closing price, trading volume, trading volume, decline or increase and other factors in the historical data of the stock, the highest price of the stock in the next day is predicted.

The data used in the experiment looks like this:
write picture description here

label is the label y, which is the highest price for the next day. Columns C - I are input features.
This example uses the first 5800 data as training data.

For the introduction of single-factor input features and RNN and LSTM, please click on a Tensorflow example: Using LSTM to predict the daily highest price of stocks (1)


Importing packages and declaring constants

import pandas as pd
import numpy as np
import tensorflow as tf

#define constants
rnn_unit=10       #hidden layer units
input_size=7      
output_size=1
lr=0.0006 #learning rate

Import Data

f=open('dataset.csv')
df=pd.read_csv(f) #Read in stock data
data=df.iloc[:,2:10].values ​​#Take columns 3-10

Generate training set and test set

Considering the real training environment, the number of training samples per batch (batch_size), time step (time_step), and the number of training sets (train_begin, train_end) are set as parameters to make the training more flexible.

#—————————Get training set —————————
def get_train_data(batch_size=60,time_step=20,train_begin=0,train_end=5800):
    batch_index=[]
    data_train=data[train_begin:train_end]
    normalized_train_data=(data_train-np.mean(data_train,axis=0))/np.std(data_train,axis=0)  #标准化
    train_x,train_y=[],[] #initial definition of training set x and y
    for i in range(len(normalized_train_data)-time_step):
       if i % batch_size==0:
           batch_index.append(i)
       x=normalized_train_data[i:i+time_step,:7]
       y=normalized_train_data[i:i+time_step,7,np.newaxis]
       train_x.append(x.tolist())
       train_y.append(y.tolist())
    batch_index.append((len(normalized_train_data)-time_step))
    return batch_index,train_x,train_y

#—————————Get test set —————————
def get_test_data(time_step=20,test_begin=5800):
    data_test=data[test_begin:]
    mean=np.mean(data_test,axis=0)
    std=np.std(data_test,axis=0)
    normalized_test_data=(data_test-mean)/std  #标准化
    size=(len(normalized_test_data)+time_step-1)//time_step  #有size个sample
    test_x,test_y=[],[]  
    for i in range(size-1):
       x=normalized_test_data[i*time_step:(i+1)*time_step,:7]
       y=normalized_test_data[i*time_step:(i+1)*time_step,7]
       test_x.append(x.tolist())
       test_y.extend(y)
    test_x.append((normalized_test_data[(i+1)*time_step:,:7]).tolist())
    test_y.extend((normalized_test_data[(i+1)*time_step:,7]).tolist())
    return mean,std,test_x,test_y

Build Neural Networks

#——————————————————Define neural network variables ——————————————————
def lstm(X):     
    batch_size=tf.shape(X)[0]
    time_step=tf.shape(X)[1]
    w_in=weights['in']
    b_in=biases['in']  
    input=tf.reshape(X,[-1,input_size]) #The tensor needs to be converted into 2 dimensions for calculation, and the calculated result is used as the input of the hidden layer
    input_rnn=tf.matmul(input,w_in)+b_in
    input_rnn=tf.reshape(input_rnn,[-1,time_step,rnn_unit]) #Convert the tensor to 3 dimensions as the input of the lstm cell
    cell=tf.nn.rnn_cell.BasicLSTMCell(rnn_unit)
    init_state=cell.zero_state(batch_size,dtype=tf.float32)
    output_rnn,final_states=tf.nn.dynamic_rnn(cell, input_rnn,initial_state=init_state, dtype=tf.float32) #output_rnn is the result of recording each output node of lstm, final_states is the result of the last cell
    output=tf.reshape(output_rnn,[-1,rnn_unit]) #As the input of the output layer
    w_out=weights['out']
    b_out=biases['out']
    pred=tf.matmul(output,w_out)+b_out
    return pred,final_states

Train the model

#—————————————————— Training model ——————————————————
def train_lstm(batch_size=80,time_step=15,train_begin=0,train_end=5800):
    X=tf.placeholder(tf.float32, shape=[None,time_step,input_size])
    Y=tf.placeholder(tf.float32, shape=[None,time_step,output_size])
    batch_index,train_x,train_y=get_train_data(batch_size,time_step,train_begin,train_end)
    pred,_=lstm(X)
    #loss function
    loss=tf.reduce_mean(tf.square(tf.reshape(pred,[-1])-tf.reshape(Y, [-1])))
    train_op=tf.train.AdamOptimizer(lr).minimize(loss)
    saver=tf.train.Saver(tf.global_variables(),max_to_keep=15)
    module_file = tf.train.latest_checkpoint()    
    with tf.Session() as sess:
        #sess.run(tf.global_variables_initializer())
        saver.restore(sess, module_file)
        #Repeat training 2000 times
        for i in range(2000):
            for step in range(len(batch_index)-1):
                _,loss_=sess.run([train_op,loss],feed_dict={X:train_x[batch_index[step]:batch_index[step+1]],Y:train_y[batch_index[step]:batch_index[step+1]]})
            print(i,loss_)
            if i % 200==0:
                print("保存模型:",saver.save(sess,'stock2.model',global_step=i))

Well, let me explain here that the parameters here are the parameters restored based on the existing model, which means that the model has been trained before, the parameters of the neural network have been saved, and now they are taken out as initialization parameters and then trained. If it is the first training, use sess.run(tf.global_variables_initializer()), and do not use module_file = tf.train.latest_checkpoint() and saver.store(sess, module_file).

test

#—————————————————Prediction Model —————————————————————
def prediction(time_step=20):
    X=tf.placeholder(tf.float32, shape=[None,time_step,input_size])
    mean,std,test_x,test_y=get_test_data(time_step)
    pred,_=lstm(X)     
    saver=tf.train.Saver(tf.global_variables())
    with tf.Session() as sess:
        #Parameter recovery
        module_file = tf.train.latest_checkpoint()
        saver.restore(sess, module_file)
        test_predict=[]
        for step in range(len(test_x)-1):
          prob=sess.run(pred,feed_dict={X:[test_x[step]]})   
          predict=prob.reshape((-1))
          test_predict.extend(predict)
        test_y=np.array(test_y)*std[7]+mean[7]
        test_predict=np.array(test_predict)*std[7]+mean[7]
        acc=np.average(np.abs(test_predict-test_y[:len(test_predict)])/test_y[:len(test_predict)]) #acc is the test set bias

The final result is drawn like this:

write picture description here
The red line is the true value, and the blue line is the predicted value

The deviation is about 1.36%

The code and data are uploaded to github, and you can stamp all the code you want

Note! : If you want to reprint, please get my permission and indicate the source!

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324849320&siteId=291194637