基于SVM的信息粒化时序回归预测——上证指数开盘指数变化趋势和变化空间预测

基于SVM的信息粒化时序回归预测——上证指数开盘指数变化趋势和变化空间预测

FIGsh

%% 清空环境变量
function chapter_FIGsh
tic;
close all;
clear;
clc;
format compact;
%% 原始数据的提取

% 载入测试数据上证指数(1990.12.19-2009.08.19)
% 数据是一个4579*6的double型的矩阵,每一行表示每一天的上证指数
% 6列分别表示当天上证指数的开盘指数,指数最高值,指数最低值,收盘指数,当日交易量,当日交易额.
load chapter_sh.mat;

% 提取数据
ts = sh_open;
time = length(ts);

% 画出原始上证指数的每日开盘数
figure;
plot(ts,'LineWidth',2);
title('上证指数的每日开盘数(1990.12.20-2009.08.19)','FontSize',12);
xlabel('交易日天数(1990.12.19-2009.08.19)','FontSize',12);
ylabel('开盘数','FontSize',12);
grid on;
% print -dtiff -r600 original;

snapnow;

%% 对原始数据进行模糊信息粒化

win_num = floor(time/5);
tsx = 1:win_num;
tsx = tsx';
[Low,R,Up]=FIG_D(ts','triangle',win_num);

% 模糊信息粒化可视化图
figure;
hold on;
plot(Low,'b+');
plot(R,'r*');
plot(Up,'gx');
hold off;
legend('Low','R','Up',2);
title('模糊信息粒化可视化图','FontSize',12);
xlabel('粒化窗口数目','FontSize',12);
ylabel('粒化值','FontSize',12);
grid on;
% print -dtiff -r600 FIGpic;

snapnow;
%% 利用SVM对Low进行回归预测

% 数据预处理,将Low进行归一化处理
% mapminmax为matlab自带的映射函数
[low,low_ps] = mapminmax(Low);
low_ps.ymin = 100;
low_ps.ymax = 500;
% 对Low进行归一化
[low,low_ps] = mapminmax(Low,low_ps);
% 画出Low归一化后的图像
figure;
plot(low,'b+');
title('Low归一化后的图像','FontSize',12);
xlabel('粒化窗口数目','FontSize',12);
ylabel('归一化后的粒化值','FontSize',12);
grid on;
% print -dtiff -r600 lowscale;
% 对low进行转置,以符合libsvm工具箱的数据格式要求
low = low';
snapnow;

% 选择回归预测分析中最佳的SVM参数c&g
% 首先进行粗略选择
[bestmse,bestc,bestg] = SVMcgForRegress(low,tsx,-10,10,-10,10,3,1,1,0.1,1);

% 打印粗略选择结果
disp('打印粗略选择结果');
str = sprintf( 'SVM parameters for Low:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);

% 根据粗略选择的结果图再进行精细选择
[bestmse,bestc,bestg] = SVMcgForRegress(low,tsx,-4,8,-10,10,3,0.5,0.5,0.05,1);

% 打印精细选择结果
disp('打印精细选择结果');
str = sprintf( 'SVM parameters for Low:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);

% 训练SVM
cmd = ['-c ', num2str(bestc), ' -g ', num2str(bestg) , ' -s 3 -p 0.1'];
low_model = svmtrain(low, tsx, cmd);

% 预测
[low_predict,low_mse] = svmpredict(low,tsx,low_model);
low_predict = mapminmax('reverse',low_predict,low_ps);
predict_low = svmpredict(1,win_num+1,low_model);
predict_low = mapminmax('reverse',predict_low,low_ps);
predict_low

%% 对于Low的回归预测结果分析
figure;
hold on;
plot(Low,'b+');
plot(low_predict,'r*');
legend('original low','predict low',2);
title('original vs predict','FontSize',12);
xlabel('粒化窗口数目','FontSize',12);
ylabel('粒化值','FontSize',12);
grid on;
% print -dtiff -r600 lowresult;

figure;
error = low_predict - Low';
plot(error,'ro');
title('误差(predicted data-original data)','FontSize',12);
xlabel('粒化窗口数目','FontSize',12);
ylabel('误差量','FontSize',12);
grid on;
% print -dtiff -r600 lowresulterror;
% snapnow;

%% 利用SVM对R进行回归预测

% 数据预处理,将R进行归一化处理
% mapminmax为matlab自带的映射函数
[r,r_ps] = mapminmax(R);
r_ps.ymin = 100;
r_ps.ymax = 500;
% 对R进行归一化
[r,r_ps] = mapminmax(R,r_ps);
% 画出R归一化后的图像
figure;
plot(r,'r*');
title('r归一化后的图像','FontSize',12);
grid on;
% 对R进行转置,以符合libsvm工具箱的数据格式要求
r = r';
% snapnow;

% 选择回归预测分析中最佳的SVM参数c&g
% 首先进行粗略选择
[bestmse,bestc,bestg] = SVMcgForRegress(r,tsx,-10,10,-10,10,3,1,1,0.1);

% 打印粗略选择结果
disp('打印粗略选择结果');
str = sprintf( 'SVM parameters for R:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);

% 根据粗略选择的结果图再进行精细选择
[bestmse,bestc,bestg] = SVMcgForRegress(r,tsx,-4,8,-10,10,3,0.5,0.5,0.05);

% 打印精细选择结果
disp('打印精细选择结果');
str = sprintf( 'SVM parameters for R:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);

% 训练SVM
cmd = ['-c ', num2str(bestc), ' -g ', num2str(bestg) , ' -s 3 -p 0.1'];
r_model = svmtrain(r, tsx, cmd);

% 预测
[r_predict,r_mse] = svmpredict(r,tsx,low_model);
r_predict = mapminmax('reverse',r_predict,r_ps);
predict_r = svmpredict(1,win_num+1,r_model);
predict_r = mapminmax('reverse',predict_r,r_ps);
predict_r

%% 对于R的回归预测结果分析
figure;
hold on;
plot(R,'b+');
plot(r_predict,'r*');
legend('original r','predict r',2);
title('original vs predict','FontSize',12);
grid on;
figure;
error = r_predict - R';
plot(error,'ro');
title('误差(predicted data-original data)','FontSize',12);
grid on;
% snapnow;

%% 利用SVM对Up进行回归预测

% 数据预处理,将up进行归一化处理
% mapminmax为matlab自带的映射函数
[up,up_ps] = mapminmax(Up);
up_ps.ymin = 100;
up_ps.ymax = 500;
% 对Up进行归一化
[up,up_ps] = mapminmax(Up,up_ps);
% 画出Up归一化后的图像
figure;
plot(up,'gx');
title('Up归一化后的图像','FontSize',12);
grid on;
% 对up进行转置,以符合libsvm工具箱的数据格式要求
up = up';
snapnow;

% 选择回归预测分析中最佳的SVM参数c&g
% 首先进行粗略选择
[bestmse,bestc,bestg] = SVMcgForRegress(up,tsx,-10,10,-10,10,3,1,1,0.5);

% 打印粗略选择结果
disp('打印粗略选择结果');
str = sprintf( 'SVM parameters for Up:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);

% 根据粗略选择的结果图再进行精细选择
[bestmse,bestc,bestg] = SVMcgForRegress(up,tsx,-4,8,-10,10,3,0.5,0.5,0.2);

% 打印精细选择结果
disp('打印精细选择结果');
str = sprintf( 'SVM parameters for Up:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);

% 训练SVM
cmd = ['-c ', num2str(bestc), ' -g ', num2str(bestg) , ' -s 3 -p 0.1'];
up_model = svmtrain(up, tsx, cmd);

% 预测
[up_predict,up_mse] = svmpredict(up,tsx,up_model);
up_predict = mapminmax('reverse',up_predict,up_ps);
predict_up = svmpredict(1,win_num+1,up_model);
predict_up = mapminmax('reverse',predict_up,up_ps);
predict_up

%% 对于Up的回归预测结果分析
figure;
hold on;
plot(Up,'b+');
plot(up_predict,'r*');
legend('original up','predict up',2);
title('original vs predict','FontSize',12);
grid on;
figure;
error = up_predict - Up';
plot(error,'ro');
title('误差(predicted data-original data)','FontSize',12);
grid on;
toc;
% snapnow;

%% 子函数 SVMcgForRegress.m
function [mse,bestc,bestg] = SVMcgForRegress(train_label,train,cmin,cmax,gmin,gmax,v,cstep,gstep,msestep,flag)
% SVMcgForClass
% 输入:
% train_label:训练集标签.要求与libsvm工具箱中要求一致.
% train:训练集.要求与libsvm工具箱中要求一致.
% cmin:惩罚参数c的变化范围的最小值(取以2为底的对数后),即 c_min = 2^(cmin).默认为 -5
% cmax:惩罚参数c的变化范围的最大值(取以2为底的对数后),即 c_max = 2^(cmax).默认为 5
% gmin:参数g的变化范围的最小值(取以2为底的对数后),即 g_min = 2^(gmin).默认为 -5
% gmax:参数g的变化范围的最小值(取以2为底的对数后),即 g_min = 2^(gmax).默认为 5
% v:cross validation的参数,即给测试集分为几部分进行cross validation.默认为 3
% cstep:参数c步进的大小.默认为 1
% gstep:参数g步进的大小.默认为 1
% msestep:最后显示MSE图时的步进大小.默认为 20
% 输出:
% bestacc:Cross Validation 过程中的最高分类准确率
% bestc:最佳的参数c
% bestg:最佳的参数g

% about the parameters of SVMcgForRegress
if nargin < 11
    flag = 0;
end
if nargin < 10
    msestep = 0.1;
end
if nargin < 7
    msestep = 0.1;
    v = 3;
    cstep = 1;
    gstep = 1;
end
if nargin < 6
    msestep = 0.1;
    v = 3;
    cstep = 1;
    gstep = 1;
    gmax = 5;
end
if nargin < 5
    msestep = 0.1;
    v = 3;
    cstep = 1;
    gstep = 1;
    gmax = 5;
    gmin = -5;
end
if nargin < 4
    msestep = 0.1;
    v = 3;
    cstep = 1;
    gstep = 1;
    gmax = 5;
    gmin = -5;
    cmax = 5;
end
if nargin < 3
    msestep = 0.1;
    v = 3;
    cstep = 1;
    gstep = 1;
    gmax = 5;
    gmin = -5;
    cmax = 5;
    cmin = -5;
end
% X:c Y:g cg:mse
[X,Y] = meshgrid(cmin:cstep:cmax,gmin:gstep:gmax);
[m,n] = size(X);
cg = zeros(m,n);
% record accuracy with different c & g,and find the best mse with the smallest c
bestc = 0;
bestg = 0;
mse = 10^10;
basenum = 2;
for i = 1:m
    for j = 1:n
        cmd = ['-v ',num2str(v),' -c ',num2str( basenum^X(i,j) ),' -g ',num2str( basenum^Y(i,j) ),' -s 3'];
        cg(i,j) = svmtrain(train_label, train, cmd);
        
        if cg(i,j) < mse
            mse = cg(i,j);
            bestc = basenum^X(i,j);
            bestg = basenum^Y(i,j);
        end
        if ( cg(i,j) == mse && bestc > basenum^X(i,j) )
            mse = cg(i,j);
            bestc = basenum^X(i,j);
            bestg = basenum^Y(i,j);
        end
        
    end
end

% draw the accuracy with different c & g
[cg,ps] = mapminmax(cg,0,1);
figure;
subplot(1,2,1);
[C,h] = contour(X,Y,cg,0:msestep:0.5);
clabel(C,h,'FontSize',10,'Color','r');
xlabel('log2c','FontSize',12);
ylabel('log2g','FontSize',12);
title('参数选择结果图(等高线图)','FontSize',12);
grid on;

subplot(1,2,2);
meshc(X,Y,cg);
% mesh(X,Y,cg);
% surf(X,Y,cg);
axis([cmin,cmax,gmin,gmax,0,1]);
xlabel('log2c','FontSize',12);
ylabel('log2g','FontSize',12);
zlabel('MSE','FontSize',12);
title('参数选择结果图(3D视图)','FontSize',12);

filename = ['c',num2str(bestc),'g',num2str(bestg),num2str(msestep),'.tif'];
% if flag == 1;
%     print('-dtiff','-r600',filename);
% end

FIG_D

function [low,R,up]=FIG_D(XX,MFkind,win_num)
%%

if nargin < 3
    win_num = 10;
end
if nargin < 2
    MFkind = 'trapezoid';
end

[d1,d2] = size(XX);
X = sort(XX);
switch MFkind 
% trapezoid  
    case('trapezoid')       
        if win_num == 1
            if mod(d2,2) ~= 0
                m = X( (d2+1)/2 );
                n = X( (d2+1)/2 );
                mflag = (d2+1)/2;
                nflag = (d2+1)/2;
            else
                m = X( d2/2 );
                n = X( (d2+2)/2 );
                mflag = d2/2;
                nflag = (d2+2)/2;
            end
            
            R(1,1) = m;
            R(2,1) = n;
            
            k1 = mflag;
            k2 = d2 - nflag+1;
            c1 = ( sum(X(1:k1)) )/k1;
            c2 = ( sum(X(nflag:d2)) )/k2;
            
            low = 2*c1 - m;
            up = 2*c2 - n;
            
        else
            low = [];
            R = [];
            up = [];
            k = floor(d2/win_num);
            for i = 1:(win_num-1)
                [l,r,u]=FIG_D(XX( (1+(i-1)*k):(k+(i-1)*k) ),MFkind,1);
                low = [low,l];
                R = [R,r];
                up = [up,u];
            end
            [l,r,u] = FIG_D(XX( (1+(win_num-1)*k):d2 ),MFkind,1);
            low =[low,l];
            R = [R,r];
            up = [up,u];
        end
%% triangle
    case('triangle')
        if win_num == 1
            
            R = median(X);
            m = median(X);
            n = median(X);
            
            mflag = floor(d2/2);
            nflag = ceil(d2/2);
            k1 = mflag;
            k2 = d2-nflag+1;
            c1 = ( sum(X(1:k1)) )/k1;
            c2 = ( sum(X(nflag:d2)) )/k2;
            
            low = 2*c1 - m;
            up = 2*c2 - n;
            
        else
            low = [];
            R = [];
            up = [];
            k = floor(d2/win_num);
            for i = 1:(win_num-1)
                [l,r,u]=FIG_D(XX( (1+(i-1)*k):(k+(i-1)*k) ),MFkind,1);
                low = [low,l];
                R = [R,r];
                up = [up,u];
            end
            [l,r,u] = FIG_D(XX( (1+(win_num-1)*k):d2 ),MFkind,1);
            low =[low,l];
            R = [R,r];
            up = [up,u];
        end
%% asygauss       
    case('asygauss')  %这个与基于Pedrycz的是一样的,因为高斯型的核函数无法修改
        if win_num == 1
            R = median(X);
            m = median(X);
            n = median(X);
            
            mflag = floor(d2/2);
            nflag = ceil(d2/2);
            
            a_final = 0;
            Qa_final = 0;
            for index = 1:( mflag-1 )
                a = X(index);
                Qa=0;              
                x = X( 1:(mflag-1) );
                y = (x<=m).*(exp(-(x-m).^2/a^2) );
                Qa = sum(y);
                Qa = Qa/(m-a);
                    if Qa>=Qa_final
                        Qa_final = Qa;
                        a_final = a;
                    end                              
            end
            
            low = a_final;
            
            b_final = 0;
            Qb_final = 0;
            for index = ( nflag+1 ):d2
                b = X(index);
                Qb = 0;                
                x = X( (nflag+1):d2 );
                y = (x>=m).*(exp(-(x-m).^2/b^2) );   
                Qb = sum(y);
                Qb = Qb/(b-n);
                    if Qb>=Qb_final
                        Qb_final = Qb;
                        b_final = b;
                    end
            end
            
            up = b_final;
            
        else
            low = [];
            R = [];
            up = [];
            k = floor(d2/win_num);
            for i = 1:(win_num-1)
                [l,r,u]=FIG_P(XX( (1+(i-1)*k):(k+(i-1)*k) ),MFkind,1);
                low = [low,l];
                R = [R,r];
                up = [up,u];
            end
            [l,r,u] = FIG_P(XX( (1+(win_num-1)*k):d2 ),MFkind,1);
            low =[low,l];
            R = [R,r];
            up = [up,u];
        end               
%% asyparabola       
    case('asyparabola')   
        if win_num == 1
            R = median(X);
            m = median(X);
            n = median(X);
            
            mflag = floor(d2/2);
            nflag = ceil(d2/2);

            a_final = 0;
            Qa_final = 0;
            for index = 1:( mflag-1 )
                a = X(index);
                Qa=0;             
                x = X( 1:( mflag-1) );          
                y=(x<=m).*(1-(m-x).^2/(m-a)^2);               
                Qa = sum(y);
                Qa = Qa/(m-a);
                    if Qa>=Qa_final
                        Qa_final = Qa;
                        a_final = a;
                    end                                
            end
            
            low = a_final;
            
            b_final = 0;
            Qb_final = 0;
            for index = ( nflag+1 ):d2
                b = X(index);
                Qb = 0;             
                x = X( (nflag+1):d2 );
                y=(x>=m).*(1-(m-x).^2/(m-b)^2);
                Qb = sum(y);
                Qb = Qb/(b-n);
                    if Qb>=Qb_final
                        Qb_final = Qb;
                        b_final = b;
                    end   
            end
            
            up = b_final;
            
       else
            low = [];
            R = [];
            up = [];
            k = floor(d2/win_num);
            for i = 1:(win_num-1)
                [l,r,u]=FIG_P(XX( (1+(i-1)*k):(k+(i-1)*k) ),MFkind,1);
                low = [low,l];
                R = [R,r];
                up = [up,u];
            end
            [l,r,u] = FIG_P(XX( (1+(win_num-1)*k):d2 ),MFkind,1);
            low =[low,l];
            R = [R,r];
            up = [up,u];
        end                
end

如果需要数据的话麻烦私信我,谢谢。

猜你喜欢

转载自blog.csdn.net/Allen1862105/article/details/129385793