# 数据预处理
data_csv = data_csv.dropna() # 滤除缺失数据
dataset = data_csv.values # 获得csv的值
dataset = dataset.astype('float32')
max_value = np.max(dataset) # 获得最大值
min_value = np.min(dataset) # 获得最小值
scalar = max_value - min_value # 获得间隔数量
dataset = list(map(lambda x: x / scalar, dataset)) # 归一化
设置数据集
设置X,Y数据集。以look_back=2为准,取第一个和第二个为数组,形成data_X,取第三个作为预测值,形成data_Y,完成训练集的提取。
def create_dataset(dataset, look_back=2):
dataX, dataY = [], []
for i in range(len(dataset) - look_back):
a = dataset[i:(i + look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)
# 创建好输入输出
data_X, data_Y = create_dataset(dataset)
# 划分训练集和测试集,70% 作为训练集
train_size = int(len(data_X) * 0.7)
test_size = len(data_X) - train_size
train_X = data_X[:train_size]
train_Y = data_Y[:train_size]
test_X = data_X[train_size:]
test_Y = data_Y[train_size:]
From: Pytorch学习之LSTM预测航班