MATLAB code for machine learning--PCA-LSTM model (12)

MATLAB code for machine learning--PCA-LSTM model (12)

the code

1、

%%  清空环境变量
warning off             % 关闭报警信息
close all               % 关闭开启的图窗
clear                   % 清空变量
clc                     % 清空命令行
%%  导入数据
X = xlsread('input');
Y = xlsread('output');
%% PCA主成分降维
[Z,MU,SIGMA]=zscore(X);

%% 计算相关系数矩阵
Sx=cov(Z);  % 相关系数矩阵计算

%% 计算相关系数矩阵的特征值特征向量
[V,D] = eig(Sx);   %计算相关系数矩阵的特征向量及特征值

eigValue = diag(D);  %将特征值提取为列向量

[eigValue,IX]=sort(eigValue,'descend');%特征值降序排序
eigVector=V(:,IX);                     %根据排序结果,特征向量排序

C=sort(eigValue,'descend');                        %特征值进行降序排序

rat1=C./sum(C)                       %求出排序后的特征值贡献率
rat2=cumsum(C)./sum(C)               %求出排序后的累计贡献率


result1(1,:)   = {
    
    '特征值','贡献率','累计贡献率'};      %细胞矩阵1第一行标题
result1(2:(length(D)+1),1) = num2cell(C);                      %将特征值放到第一列
result1(2:(length(D)+1),2) = num2cell(rat1);                   %将贡献率放到第二列
result1(2:(length(D)+1),3) = num2cell(rat2)                    %将累计贡放到第三列
%% 特征向量的归一化处理
norm_eigVector=sqrt(sum(eigVector.^2));%特征向量进行归一化处理
eigVector=eigVector./repmat(norm_eigVector,size(eigVector,1),1);

%% 判断贡献率
 %根据贡献率达到85%故选择
 d=7;% 这块根据实际情况修改

%% 数据降维处理
eigVector=eigVector(:,1:d); %求出对应特征向量
Y1=X*eigVector;              %处理样本和向量相乘获得降维数据
%% 赋值为原始X
X=Y1;
%% 训练集和预测集划分
% 训练集-
P_train= X((1:40),:)';%冒号代表取出来是整行或者整列,'代表转置
P_test = X((41:end),:)';
M = size(P_train,2);
% 测试集-
T_train= Y((1:40),:)';
T_test = Y((41:end),:)';
N = size(T_test,2);
%% 数据归一化
[P_train, ps_input] = mapminmax(P_train, 0, 1);
P_test = mapminmax('apply', P_test, ps_input);

[t_train, ps_output] = mapminmax(T_train, 0, 1);
t_test = mapminmax('apply', T_test, ps_output);
%%  数据平铺
P_train =  double(reshape(P_train, 7, 1, 1, 40));
P_test  =  double(reshape(P_test , 7, 1, 1, 20));

t_train = t_train';
t_test  = t_test' ;

%%  数据格式转换
for i = 1 : M
    p_train{
    
    i, 1} = P_train(:, :, 1, i);
end

for i = 1 : N
    p_test{
    
    i, 1}  = P_test( :, :, 1, i);
end

%%  创建模型
layers = [
    sequenceInputLayer(7)              % 建立输入层
    
    lstmLayer(10, 'OutputMode', 'last') % LSTM层
    reluLayer                           % Relu激活层
    
    fullyConnectedLayer(1)         % 全连接层
    regressionLayer];                   % 回归层
 
%% 参数设置
options = trainingOptions('adam', ...     % Adam 梯度下降算法
    'MaxEpochs',1000,...                  % 最大训练次数1000
    'InitialLearnRate', 5e-3,...          % 初始学习率为0.005
    'LearnRateSchedule', 'piecewise',...  % 学习率下降
    'LearnRateDropFactor', 0.5,...        % 学习率下降因子 0.5
    'LearnRateDropPeriod', 900,...        % 经过900次训练后 学习率为 0.005*0.5
    'Shuffle', 'every-epoch',...          % 每次训练打乱数据集
    'Plots', 'training-progress',...      % 画出曲线
    'Verbose', false);

%%  训练模型
net = trainNetwork(p_train, t_train, layers, options);

%%  仿真预测
t_sim1 = predict(net, p_train);
t_sim2 = predict(net, p_test );

%%  数据反归一化
T_sim1 = mapminmax('reverse', t_sim1, ps_output);
T_sim2 = mapminmax('reverse', t_sim2, ps_output);

%%  均方根误差
error1 = sqrt(sum((T_sim1' - T_train).^2) ./ M);
error2 = sqrt(sum((T_sim2' - T_test ).^2) ./ N);
%% R2
R1 = 1 - norm(T_train - T_sim1')^2 / norm(T_train - mean(T_train))^2;
R2 = 1 - norm(T_test -  T_sim2')^2 / norm(T_test -  mean(T_test ))^2;
%%  MAE
mae1 = sum(abs(T_sim1' - T_train)) ./ M ;
mae2 = sum(abs(T_sim2' - T_test )) ./ N ;

%%  绘图
figure
plot(1: M, T_train, 'r-*', 1: M, T_sim1, 'b-o', 'LineWidth', 1)
legend('真实值','预测值')
xlabel('预测样本')
ylabel('预测结果')
string = {
    
    'PCA-LSTM训练集预测结果对比'; ['(RMSE=' num2str(error1) 'R^2 =' num2str(R1) 'MAE =' num2str(mae1) ')']};
title(string)
xlim([1, M])
grid

figure
plot(1: N, T_test, 'r-*', 1: N, T_sim2, 'b-o', 'LineWidth', 1)
legend('真实值','预测值')
xlabel('预测样本')
ylabel('预测结果')
string = {
    
    'PCA-LSTM测试集预测结果对比';['(RMSE=' num2str(error2) 'R^2 =' num2str(R2)  'MAE =' num2str(mae2) ')']};
title(string)
xlim([1, N])
grid

%% 打印出模型评价指标
disp(['训练集数据的RMSEC为:', num2str(error1)])
disp(['测试集数据的RMSEP为:', num2str(error2)])

disp(['训练集数据的R2为:', num2str(R1)])
disp(['测试集数据的R2为:', num2str(R2)])

disp(['训练集数据的MAE为:', num2str(mae1)])
disp(['测试集数据的MAE为:', num2str(mae2)])
%% 测试集误差图
figure  
plot(T_test'-T_sim2,'b-*', 'LineWidth', 1)
xlabel('测试集样本编号')
ylabel('预测误差')
title('PCA-LSTM测试集预测误差')
grid on;
legend('PCA-LSTM预测输出误差')

    

2、

function [z,mu,sigma] = zscore(x,flag,dim)
%ZSCORE Standardized z score for tall arrays
%   Z = ZSCORE(X)
%   [Z,MU,SIGMA] = ZSCORE(X)
%   [...] = ZSCORE(X,1)
%   [...] = ZSCORE(X,FLAG,DIM)
%
%   See also ZSCORE, MEAN, STD.

%   Copyright 2015 The MathWorks, Inc.

if nargin < 2
    flag = 0;
end

if nargin < 3
    % We can't determine dim for tall arrays as the size might not be
    % known. If not supplied, use the dimensionless versions.
    validateFlag(flag)
    mu = mean(x);
    sigma = std(x,flag);
else
    % User supplied dim
    validateFlag(flag)
    mu = mean(x,dim);
    sigma = std(x,flag,dim);
end

% Now standardize X to give Z, taking care when sigma is zero
sigma0 = sigma;
subs = struct('type','()','subs',{
    
    {
    
    sigma0==0}});
sigma0 = subsasgn(sigma0, subs, 1);

z = (x - mu) ./ sigma0;


function validateFlag(flag)
validateattributes(flag,{
    
    'numeric'}, {
    
    'integer','scalar','binary'}, 2);

3、

% 决定系数
function R2=eva1(T_train,T_sim1)

N = length(T_train);
R2=(N*sum(T_sim1.*T_train)-sum(T_sim1)*sum(T_train))^2/((N*sum((T_sim1).^2)-(sum(T_sim1))^2)*(N*sum((T_train).^2)-(sum(T_train))^2));

4、

%决定系数
function R2=eva1(T_test,T_sim2)

N = length(T_test);
R2=(N*sum(T_sim2.*T_test)-sum(T_sim2)*sum(T_test))^2/((N*sum((T_sim2).^2)-(sum(T_sim2))^2)*(N*sum((T_test).^2)-(sum(T_test))^2));

data

insert image description here
insert image description here

result

insert image description here
insert image description here
insert image description here
insert image description here
If you need code and data, please send an email in the comment area, usually a reply will be made within one day, please like + pay attention, thank you! !

Guess you like

Origin blog.csdn.net/weixin_44312889/article/details/128106417