基于TensorFlow的双层BiDirection_Attention_LSTM的北京PM2.5数据集预测

预测北京PM2.5数据集你会学到：

传统的线性模型难以解决多变量或多输入问题，而神经网络如LSTM则擅长于处理多个变量的问题，该特性使其有助于解决时间序列预测问题。
　　
　　在接下来的这篇博客中，你将学会如何利用深度学习框架TF搭建LSTM模型来处理多个变量的时间序列预测问题。
　　经过这个博客你会掌握：
　　1. 如何将原始数据转化为适合处理时序预测问题的数据格式；
　　2. 如何准备数据并搭建双层双向＋Attention的LSTM来处理时序预测问题；
　　3. 如何利用模型预测。

1.数据集内容

在这里插入图片描述

2.数据处理

利用Pandas处理数据集，PM2.5数据集在这里 ==》
[link](Beijing PM2.5 Data Set)

def PreData():
    df = pd.read_csv(config.preprocess_file_path)
    # 丢弃掉不需要的行
    df.drop('No', axis=1, inplace=True)
    # 丢弃含有NA的行
    df.dropna(axis=0, how='any', inplace=True`
    # 填充csv中的NAN为0.0
    df.fillna(axis=0, value=0.0, inplace=True)

    # index为False表示不要序列
    df.to_csv(config.process_file_path, index=False)

    new_pd = pd.read_csv(config.process_file_path)
    print(new_pd.info())
    print(new_pd.head(5))
    
    #准备训练和测试的批次为100的数据
    
def getTrain_Test():
    df = pd.read_csv(config.process_file_path)
    weather_data = df.iloc[0:40000, 5:12]
    test_data = df.iloc[40000:41700, 5:12]
    weather_pm = df.iloc[0:40000, 4:5]
    test_pm = df.iloc[40000:41700, 4:5]

    print(weather_data)
    print(type(weather_pm))
    print(len(weather_pm))

    i = 0
    # 当数据为测试集时
    if FLAGS.isPreData_isTrain_isPredict == 2:
        while i < len(test_data):
            # 当数据为训练集时

            x_train = test_data[i:i + config.batch_size].values
            y_label = test_pm[i:i + config.batch_size].values
            # print(type(x_train))   <class 'numpy.ndarray'>
            # print(x_train.shape)   (72, 7)
            string_encode = sp.LabelEncoder()
            to_1 = sp.MinMaxScaler(feature_range=(0, 1))
            x_train[:, 3] = string_encode.fit_transform(x_train[:, 3])
            x_train = to_1.fit_transform(x_train)
            y_label = to_1.fit_transform(y_label)
            x_train = np.reshape(x_train, [100, 7, 1])
            train_x_list.append(x_train.tolist())
            label_y_list.append(y_label.tolist())
            i += config.batch_size

    if FLAGS.isPreData_isTrain_isPredict == 1:
        while i < len(weather_pm):
            # 当数据为训练集时

            x_train = weather_data[i:i + config.batch_size].values
            y_label = weather_pm[i:i + config.batch_size].values
            # print(type(x_train))   <class 'numpy.ndarray'>
            # print(x_train.shape)   (72, 7)
            string_encode = sp.LabelEncoder()
            to_1 = sp.MinMaxScaler(feature_range=(0, 1))
            x_train[:, 3] = string_encode.fit_transform(x_train[:, 3])
            x_train = to_1.fit_transform(x_train)
            y_label = to_1.fit_transform(y_label)
            x_train = np.reshape(x_train, [100, 7, 1])
            train_x_list.append(x_train.tolist())
            label_y_list.append(y_label.tolist())
            i += config.batch_size

3.需要导入的包以及参数配置

import tensorflow as tf
import pandas as pd
import sklearn.preprocessing as sp
import numpy as np

FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_integer('isPreData_isTrain_isPredict', 1, '0为PreData，1为Train，2为Predict')
tf.flags.DEFINE_integer('isAttention', 1, '0为关闭Attention机制，1为启用Attention机制')
tf.flags.DEFINE_integer('is_featureMap', 0, '0为每个特征都Attention权重，1为每个featureMap一个权重')


class Config():
    preprocess_file_path = './PRSA_data_2010.1.1-2014.12.31.csv'
    process_file_path = './PMcsv/bjpm.csv'
    batch_size = 100
    loop_time = [128, 128]
    input_train_dim = 7
    input_label_dim = 1
    lstm_units = 128


config = Config()
train_x_list = []
label_y_list = []

input_data = tf.placeholder(tf.float32, [1, config.batch_size, config.input_train_dim, 1])
input_label = tf.placeholder(tf.float32, [1, config.batch_size, config.input_label_dim])

4.下面上LSTM模型了

def Bi_lstm():
    global input_data
    # 双层LSTM，第二次LSTM将前向，后向的输出值拼接起来
    concat_tensor = input_data
    concat_tensor = tf.reshape(concat_tensor, [100, 7, 1])

    with tf.variable_scope('lstm1'):
        lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=config.lstm_units)
        lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.95, output_keep_prob=0.95)
        lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.95, output_keep_prob=0.95)
        # [100,7,1]===>[100,7,128]
        value, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, concat_tensor, dtype=tf.float32)
        # [[100,7,128],[100,7,128]]===>[100,7,256]
        concat_tensor = tf.concat(value, 2)
        # concat_tensor = tf.layers.batch_normalization(concat_tensor)
    with tf.variable_scope('lstm2'):
        lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=config.lstm_units)
        lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.90, output_keep_prob=0.80)
        lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.90, output_keep_prob=0.80)
        # [100,7,1]===>[100,7,128]
        value2, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, concat_tensor, dtype=tf.float32)
        # [[100,7,128],[100,7,128]]===>[100,7,256]
        bi_concat_tensor = tf.concat(value2, 2)
        # bi_concat_tensor = tf.layers.batch_normalization(bi_concat_tensor)
        # C耦合

        # c_cell = tf.Variable(tf.truncated_normal(shape=[100, 256, 256]))
        # bta_concat_tensor = tf.nn.softmax(c_cell)
        # ones = tf.ones(shape=[100, 256, 256])
        # bta_bi_concat_tensor = ones - bta_concat_tensor
        # lstm_outvalue = tf.matmul(concat_tensor, bta_concat_tensor) + tf.matmul(bi_concat_tensor, bta_bi_concat_tensor)
    if FLAGS.isAttention == 0:
        bi_value = tf.split(bi_concat_tensor, 2, 2)

        # 将前向，后向的lstm输出的特征相加
        lstm_value = bi_value[0] + bi_value[1]
        new_value = tf.transpose(lstm_value, perm=[1, 0, 2])
        # [200,7,128]=第七层的[200,128]
        tf.cast(tf.shape(new_value)[0], dtype=tf.int32) - 1

        gather = tf.gather(new_value, tf.cast(tf.shape(new_value)[0], tf.int32) - 1)

        # full_lstm_value = tf.reshape(lstm_value, shape=[config.batch_size, config.input_train_dim * config.lstm_units])
        with tf.variable_scope("full_connect"):
            # [128,1]
            weight = w_init(config.lstm_units, config.input_label_dim)
            bias = b_init(config.input_label_dim)
            # [batch_size,1]

            predict = tf.nn.xw_plus_b(gather, weight, bias)
            return predict
    else:
        bi_value = tf.split(bi_concat_tensor, 2, 2)
        attention = Attention(bi_value[0] + bi_value[1])
        with tf.variable_scope("full_connect1"):
            # [200,128][128,256]=[200,256]
            weight = w_init(config.lstm_units, 256)
            bias = b_init(256)
            # [batch_size,1]
            full_value = tf.nn.xw_plus_b(attention, weight, bias)
        with tf.variable_scope("full_connect2"):
            # [200,128][128,256]=[200,256]
            weight = w_init(256, config.input_label_dim)
            bias = b_init(config.input_label_dim)
            # [batch_size,1]
            predict = tf.nn.xw_plus_b(full_value, weight, bias)
            return predict

5.Attention机制

这里我准备了两套：1.基于特征图的Attention
2.基于每一个特征值的Attention

def Attention(input):
    tanh_output = tf.tanh(input)
    # 给一个批次中所有的特征一个权重
    #特征Attetion
    if FLAGS.is_featureMap == 0:
        weight = tf.Variable(
            tf.truncated_normal(shape=[config.lstm_units * config.input_train_dim, config.input_train_dim]))
        re_tanh_output = tf.reshape(tanh_output, [config.batch_size, config.lstm_units * config.input_train_dim])

        # [100,7*128]**[128*7,7]=[100,7]
        mul_output = tf.matmul(re_tanh_output, weight)
        # [100,7]个权重
        alpha = tf.nn.softmax(mul_output)
        # [100,128,7]*[100,7,1]==[100,128,1]
        end_value = tf.matmul(tf.transpose(input, [0, 2, 1]),
                              tf.reshape(alpha, [config.batch_size, config.input_train_dim, 1]))
        s_value = tf.squeeze(end_value)
        tanh_s_val = tf.tanh(s_value)
        end_value = tf.nn.dropout(tanh_s_val, keep_prob=0.9)
        return end_value
        
#特征图Attetion

    else:
        weight = tf.Variable(tf.truncated_normal(shape=[config.lstm_units, 1]))
        re_tanh_output = tf.reshape(tanh_output, [config.batch_size * config.input_train_dim, config.lstm_units])

        # [100*7,128]**[128,1]=[100*7,1]
        mul_output = tf.matmul(re_tanh_output, weight)

        tf.reshape(mul_output, [config.batch_size, config.input_train_dim])
        # [100,7]个权重
        alpha = tf.nn.softmax(mul_output)
        # [100,128,7]*[100,7,1]==[100,128,1]
        end_value = tf.matmul(tf.transpose(input, [0, 2, 1]),
                              tf.reshape(alpha, [config.batch_size, config.input_train_dim, 1]))
        s_value = tf.squeeze(end_value)
        return tf.tanh(s_value)

6.训练模型

def train_lstm():
    getTrain_Test()
    predict = Bi_lstm()
    loss = tf.reduce_sum(tf.square(tf.reshape(predict, [-1]) - tf.reshape(input_label, [-1])))
    acc = tf.reduce_mean(tf.abs(tf.reshape(predict, [-1]) - tf.reshape(input_label, [-1])))
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = tf.train.AdamOptimizer().minimize(loss)

    saver = tf.train.Saver()
    init_op = tf.global_variables_initializer()

    tf.summary.scalar("loss", loss)
    tf.summary.scalar("acc", acc)
    merge = tf.summary.merge_all()
    with tf.Session() as sess:
        sess.run(init_op)
        if FLAGS.isPreData_isTrain_isPredict == 1:

            file_write = tf.summary.FileWriter("./Pm_summary/"
                                               , sess.graph)
            for i in range(20):
                start = 0
                end = 1
                while (end < len(train_x_list)):
                    sess.run(train_op,
                             feed_dict={input_data: train_x_list[start:end], input_label: label_y_list[start:end]})
                    print('第%d轮,第%d批次,loss===》%f,预测误差为=====》%f' % (i, end, sess.run(loss,
                                                                                    feed_dict={
                                                                                        input_data: train_x_list[
                                                                                                    start:end],
                                                                                        input_label: label_y_list[
                                                                                                     start:end]}),
                                                                   sess.run(acc,
                                                                            feed_dict={
                                                                                input_data: train_x_list[start:end],
                                                                                input_label: label_y_list[
                                                                                             start:end]}))
                          )
                    merged = sess.run(merge, feed_dict={input_data: train_x_list[start:end],
                                                        input_label: label_y_list[
                                                                     start:end]})
                    file_write.add_summary(merged, end)
                    start += 1
                    end += 1

                saver.save(sess, "C:\\Users\\76152\\PycharmProjects\\LSTM\\check\\check")
        if FLAGS.isPreData_isTrain_isPredict == 2:
            saver.restore(sess, "C:\\Users\\76152\\PycharmProjects\\LSTM\\check\\check")
            for i in range(1):
                start = 0
                end = 1
                while (end < len(train_x_list)):
                    print('第%d轮,第%d批次,loss===》%f,预测误差为=====》%f' % (i, end, sess.run(loss,
                                                                                    feed_dict={
                                                                                        input_data: train_x_list[
                                                                                                    start:end],
                                                                                        input_label: label_y_list[
                                                                                                     start:end]}),
                                                                   sess.run(acc,
                                                                            feed_dict={
                                                                                input_data: train_x_list[start:end],
                                                                                input_label: label_y_list[
                                                                                             start:end]}))
                          )

                    start += 1
                    end += 1

if __name__ == '__main__':
    train_lstm()

7.完整代码在这里

import tensorflow as tf
import pandas as pd
import sklearn.preprocessing as sp
import numpy as np

FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_integer('isPreData_isTrain_isPredict', 1, '0为PreData，1为Train，2为Predict')
tf.flags.DEFINE_integer('isAttention', 1, '0为关闭Attention机制，1为启用Attention机制')
tf.flags.DEFINE_integer('is_featureMap', 0, '0为每个特征都Attention权重，1为每个featureMap一个权重')


class Config():
    preprocess_file_path = './PRSA_data_2010.1.1-2014.12.31.csv'
    process_file_path = './PMcsv/bjpm.csv'
    batch_size = 100
    loop_time = [128, 128]
    input_train_dim = 7
    input_label_dim = 1
    lstm_units = 128


config = Config()
train_x_list = []
label_y_list = []

input_data = tf.placeholder(tf.float32, [1, config.batch_size, config.input_train_dim, 1])
input_label = tf.placeholder(tf.float32, [1, config.batch_size, config.input_label_dim])


def w_init(shape0: int, shape1: int):
    weight = tf.Variable(tf.random_normal([shape0, shape1]))
    return weight


def b_init(shape: int):
    bias = tf.Variable(tf.constant(0.01, dtype=tf.float32, shape=[shape]))
    return bias


def PreData():
    df = pd.read_csv(config.preprocess_file_path)
    # 丢弃掉不需要的行
    df.drop('No', axis=1, inplace=True)
    # 丢弃含有NA的行
    df.dropna(axis=0, how='any', inplace=True)
    # 填充csv中的NAN为0.0
    df.fillna(axis=0, value=0.0, inplace=True)

    # index为False表示不要序列
    df.to_csv(config.process_file_path, index=False)

    new_pd = pd.read_csv(config.process_file_path)
    print(new_pd.info())
    print(new_pd.head(5))


def getTrain_Test():
    df = pd.read_csv(config.process_file_path)
    weather_data = df.iloc[0:40000, 5:12]
    test_data = df.iloc[40000:41700, 5:12]
    weather_pm = df.iloc[0:40000, 4:5]
    test_pm = df.iloc[40000:41700, 4:5]

    print(weather_data)
    print(type(weather_pm))
    print(len(weather_pm))

    i = 0
    # 当数据为测试集时
    if FLAGS.isPreData_isTrain_isPredict == 2:
        while i < len(test_data):
            # 当数据为训练集时

            x_train = test_data[i:i + config.batch_size].values
            y_label = test_pm[i:i + config.batch_size].values
            # print(type(x_train))   <class 'numpy.ndarray'>
            # print(x_train.shape)   (72, 7)
            string_encode = sp.LabelEncoder()
            to_1 = sp.MinMaxScaler(feature_range=(0, 1))
            x_train[:, 3] = string_encode.fit_transform(x_train[:, 3])
            x_train = to_1.fit_transform(x_train)
            y_label = to_1.fit_transform(y_label)
            x_train = np.reshape(x_train, [100, 7, 1])
            train_x_list.append(x_train.tolist())
            label_y_list.append(y_label.tolist())
            i += config.batch_size

    if FLAGS.isPreData_isTrain_isPredict == 1:
        while i < len(weather_pm):
            # 当数据为训练集时

            x_train = weather_data[i:i + config.batch_size].values
            y_label = weather_pm[i:i + config.batch_size].values
            # print(type(x_train))   <class 'numpy.ndarray'>
            # print(x_train.shape)   (72, 7)
            string_encode = sp.LabelEncoder()
            to_1 = sp.MinMaxScaler(feature_range=(0, 1))
            x_train[:, 3] = string_encode.fit_transform(x_train[:, 3])
            x_train = to_1.fit_transform(x_train)
            y_label = to_1.fit_transform(y_label)
            x_train = np.reshape(x_train, [100, 7, 1])
            train_x_list.append(x_train.tolist())
            label_y_list.append(y_label.tolist())
            i += config.batch_size


def Bi_lstm():
    global input_data
    # 双层LSTM，第二次LSTM将前向，后向的输出值拼接起来
    concat_tensor = input_data
    concat_tensor = tf.reshape(concat_tensor, [100, 7, 1])

    with tf.variable_scope('lstm1'):
        lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=config.lstm_units)
        lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.95, output_keep_prob=0.95)
        lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.95, output_keep_prob=0.95)
        # [100,7,1]===>[100,7,128]
        value, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, concat_tensor, dtype=tf.float32)
        # [[100,7,128],[100,7,128]]===>[100,7,256]
        concat_tensor = tf.concat(value, 2)
        # concat_tensor = tf.layers.batch_normalization(concat_tensor)
    with tf.variable_scope('lstm2'):
        lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=config.lstm_units)
        lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.90, output_keep_prob=0.80)
        lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=0.90, output_keep_prob=0.80)
        # [100,7,1]===>[100,7,128]
        value2, _ = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, concat_tensor, dtype=tf.float32)
        # [[100,7,128],[100,7,128]]===>[100,7,256]
        bi_concat_tensor = tf.concat(value2, 2)
        # bi_concat_tensor = tf.layers.batch_normalization(bi_concat_tensor)
        # C耦合

        # c_cell = tf.Variable(tf.truncated_normal(shape=[100, 256, 256]))
        # bta_concat_tensor = tf.nn.softmax(c_cell)
        # ones = tf.ones(shape=[100, 256, 256])
        # bta_bi_concat_tensor = ones - bta_concat_tensor
        # lstm_outvalue = tf.matmul(concat_tensor, bta_concat_tensor) + tf.matmul(bi_concat_tensor, bta_bi_concat_tensor)
    if FLAGS.isAttention == 0:
        bi_value = tf.split(bi_concat_tensor, 2, 2)

        # 将前向，后向的lstm输出的特征相加
        lstm_value = bi_value[0] + bi_value[1]
        new_value = tf.transpose(lstm_value, perm=[1, 0, 2])
        # [200,7,128]=第七层的[200,128]
        tf.cast(tf.shape(new_value)[0], dtype=tf.int32) - 1

        gather = tf.gather(new_value, tf.cast(tf.shape(new_value)[0], tf.int32) - 1)

        # full_lstm_value = tf.reshape(lstm_value, shape=[config.batch_size, config.input_train_dim * config.lstm_units])
        with tf.variable_scope("full_connect"):
            # [128,1]
            weight = w_init(config.lstm_units, config.input_label_dim)
            bias = b_init(config.input_label_dim)
            # [batch_size,1]

            predict = tf.nn.xw_plus_b(gather, weight, bias)
            return predict
    else:
        bi_value = tf.split(bi_concat_tensor, 2, 2)
        attention = Attention(bi_value[0] + bi_value[1])
        with tf.variable_scope("full_connect1"):
            # [200,128][128,256]=[200,256]
            weight = w_init(config.lstm_units, 256)
            bias = b_init(256)
            # [batch_size,1]
            full_value = tf.nn.xw_plus_b(attention, weight, bias)
        with tf.variable_scope("full_connect2"):
            # [200,128][128,256]=[200,256]
            weight = w_init(256, config.input_label_dim)
            bias = b_init(config.input_label_dim)
            # [batch_size,1]
            predict = tf.nn.xw_plus_b(full_value, weight, bias)
            return predict


def train_lstm():
    getTrain_Test()
    predict = Bi_lstm()
    loss = tf.reduce_sum(tf.square(tf.reshape(predict, [-1]) - tf.reshape(input_label, [-1])))
    acc = tf.reduce_mean(tf.abs(tf.reshape(predict, [-1]) - tf.reshape(input_label, [-1])))
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = tf.train.AdamOptimizer().minimize(loss)

    saver = tf.train.Saver()
    init_op = tf.global_variables_initializer()

    tf.summary.scalar("loss", loss)
    tf.summary.scalar("acc", acc)
    merge = tf.summary.merge_all()
    with tf.Session() as sess:
        sess.run(init_op)
        if FLAGS.isPreData_isTrain_isPredict == 1:

            file_write = tf.summary.FileWriter("./Pm_summary/"
                                               , sess.graph)
            for i in range(20):
                start = 0
                end = 1
                while (end < len(train_x_list)):
                    sess.run(train_op,
                             feed_dict={input_data: train_x_list[start:end], input_label: label_y_list[start:end]})
                    print('第%d轮,第%d批次,loss===》%f,预测误差为=====》%f' % (i, end, sess.run(loss,
                                                                                    feed_dict={
                                                                                        input_data: train_x_list[
                                                                                                    start:end],
                                                                                        input_label: label_y_list[
                                                                                                     start:end]}),
                                                                   sess.run(acc,
                                                                            feed_dict={
                                                                                input_data: train_x_list[start:end],
                                                                                input_label: label_y_list[
                                                                                             start:end]}))
                          )
                    merged = sess.run(merge, feed_dict={input_data: train_x_list[start:end],
                                                        input_label: label_y_list[
                                                                     start:end]})
                    file_write.add_summary(merged, end)
                    start += 1
                    end += 1

                saver.save(sess, "C:\\Users\\76152\\PycharmProjects\\LSTM\\check\\check")
        if FLAGS.isPreData_isTrain_isPredict == 2:
            saver.restore(sess, "C:\\Users\\76152\\PycharmProjects\\LSTM\\check\\check")
            for i in range(1):
                start = 0
                end = 1
                while (end < len(train_x_list)):
                    print('第%d轮,第%d批次,loss===》%f,预测误差为=====》%f' % (i, end, sess.run(loss,
                                                                                    feed_dict={
                                                                                        input_data: train_x_list[
                                                                                                    start:end],
                                                                                        input_label: label_y_list[
                                                                                                     start:end]}),
                                                                   sess.run(acc,
                                                                            feed_dict={
                                                                                input_data: train_x_list[start:end],
                                                                                input_label: label_y_list[
                                                                                             start:end]}))
                          )

                    start += 1
                    end += 1

    # [200,7,128]


def Attention(input):
    tanh_output = tf.tanh(input)
    # 给一个批次中所有的特征一个权重
    if FLAGS.is_featureMap == 0:
        weight = tf.Variable(
            tf.truncated_normal(shape=[config.lstm_units * config.input_train_dim, config.input_train_dim]))
        re_tanh_output = tf.reshape(tanh_output, [config.batch_size, config.lstm_units * config.input_train_dim])

        # [200,7*128]**[128*7,7]=[200,7]
        mul_output = tf.matmul(re_tanh_output, weight)
        # [200,7]个权重
        alpha = tf.nn.softmax(mul_output)
        # [200,128,7]*[200,7,1]==[200,128,1]
        end_value = tf.matmul(tf.transpose(input, [0, 2, 1]),
                              tf.reshape(alpha, [config.batch_size, config.input_train_dim, 1]))
        s_value = tf.squeeze(end_value)
        tanh_s_val = tf.tanh(s_value)
        end_value = tf.nn.dropout(tanh_s_val, keep_prob=0.9)
        return end_value
    else:
        weight = tf.Variable(tf.truncated_normal(shape=[config.lstm_units, 1]))
        re_tanh_output = tf.reshape(tanh_output, [config.batch_size * config.input_train_dim, config.lstm_units])

        # [200*7,128]**[128,1]=[200*7,1]
        mul_output = tf.matmul(re_tanh_output, weight)

        tf.reshape(mul_output, [config.batch_size, config.input_train_dim])
        # [200,7]个权重
        alpha = tf.nn.softmax(mul_output)
        # [200,128,7]*[200,7,1]==[200,128,1]
        end_value = tf.matmul(tf.transpose(input, [0, 2, 1]),
                              tf.reshape(alpha, [config.batch_size, config.input_train_dim, 1]))
        s_value = tf.squeeze(end_value)
        return tf.tanh(s_value)


if __name__ == '__main__':
    train_lstm()