基于SVM的信息粒化时序回归预测——上证指数开盘指数变化趋势和变化空间预测
FIGsh
%% 清空环境变量
function chapter_FIGsh
tic;
close all;
clear;
clc;
format compact;
%% 原始数据的提取
% 载入测试数据上证指数(1990.12.19-2009.08.19)
% 数据是一个4579*6的double型的矩阵,每一行表示每一天的上证指数
% 6列分别表示当天上证指数的开盘指数,指数最高值,指数最低值,收盘指数,当日交易量,当日交易额.
load chapter_sh.mat;
% 提取数据
ts = sh_open;
time = length(ts);
% 画出原始上证指数的每日开盘数
figure;
plot(ts,'LineWidth',2);
title('上证指数的每日开盘数(1990.12.20-2009.08.19)','FontSize',12);
xlabel('交易日天数(1990.12.19-2009.08.19)','FontSize',12);
ylabel('开盘数','FontSize',12);
grid on;
% print -dtiff -r600 original;
snapnow;
%% 对原始数据进行模糊信息粒化
win_num = floor(time/5);
tsx = 1:win_num;
tsx = tsx';
[Low,R,Up]=FIG_D(ts','triangle',win_num);
% 模糊信息粒化可视化图
figure;
hold on;
plot(Low,'b+');
plot(R,'r*');
plot(Up,'gx');
hold off;
legend('Low','R','Up',2);
title('模糊信息粒化可视化图','FontSize',12);
xlabel('粒化窗口数目','FontSize',12);
ylabel('粒化值','FontSize',12);
grid on;
% print -dtiff -r600 FIGpic;
snapnow;
%% 利用SVM对Low进行回归预测
% 数据预处理,将Low进行归一化处理
% mapminmax为matlab自带的映射函数
[low,low_ps] = mapminmax(Low);
low_ps.ymin = 100;
low_ps.ymax = 500;
% 对Low进行归一化
[low,low_ps] = mapminmax(Low,low_ps);
% 画出Low归一化后的图像
figure;
plot(low,'b+');
title('Low归一化后的图像','FontSize',12);
xlabel('粒化窗口数目','FontSize',12);
ylabel('归一化后的粒化值','FontSize',12);
grid on;
% print -dtiff -r600 lowscale;
% 对low进行转置,以符合libsvm工具箱的数据格式要求
low = low';
snapnow;
% 选择回归预测分析中最佳的SVM参数c&g
% 首先进行粗略选择
[bestmse,bestc,bestg] = SVMcgForRegress(low,tsx,-10,10,-10,10,3,1,1,0.1,1);
% 打印粗略选择结果
disp('打印粗略选择结果');
str = sprintf( 'SVM parameters for Low:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);
% 根据粗略选择的结果图再进行精细选择
[bestmse,bestc,bestg] = SVMcgForRegress(low,tsx,-4,8,-10,10,3,0.5,0.5,0.05,1);
% 打印精细选择结果
disp('打印精细选择结果');
str = sprintf( 'SVM parameters for Low:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);
% 训练SVM
cmd = ['-c ', num2str(bestc), ' -g ', num2str(bestg) , ' -s 3 -p 0.1'];
low_model = svmtrain(low, tsx, cmd);
% 预测
[low_predict,low_mse] = svmpredict(low,tsx,low_model);
low_predict = mapminmax('reverse',low_predict,low_ps);
predict_low = svmpredict(1,win_num+1,low_model);
predict_low = mapminmax('reverse',predict_low,low_ps);
predict_low
%% 对于Low的回归预测结果分析
figure;
hold on;
plot(Low,'b+');
plot(low_predict,'r*');
legend('original low','predict low',2);
title('original vs predict','FontSize',12);
xlabel('粒化窗口数目','FontSize',12);
ylabel('粒化值','FontSize',12);
grid on;
% print -dtiff -r600 lowresult;
figure;
error = low_predict - Low';
plot(error,'ro');
title('误差(predicted data-original data)','FontSize',12);
xlabel('粒化窗口数目','FontSize',12);
ylabel('误差量','FontSize',12);
grid on;
% print -dtiff -r600 lowresulterror;
% snapnow;
%% 利用SVM对R进行回归预测
% 数据预处理,将R进行归一化处理
% mapminmax为matlab自带的映射函数
[r,r_ps] = mapminmax(R);
r_ps.ymin = 100;
r_ps.ymax = 500;
% 对R进行归一化
[r,r_ps] = mapminmax(R,r_ps);
% 画出R归一化后的图像
figure;
plot(r,'r*');
title('r归一化后的图像','FontSize',12);
grid on;
% 对R进行转置,以符合libsvm工具箱的数据格式要求
r = r';
% snapnow;
% 选择回归预测分析中最佳的SVM参数c&g
% 首先进行粗略选择
[bestmse,bestc,bestg] = SVMcgForRegress(r,tsx,-10,10,-10,10,3,1,1,0.1);
% 打印粗略选择结果
disp('打印粗略选择结果');
str = sprintf( 'SVM parameters for R:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);
% 根据粗略选择的结果图再进行精细选择
[bestmse,bestc,bestg] = SVMcgForRegress(r,tsx,-4,8,-10,10,3,0.5,0.5,0.05);
% 打印精细选择结果
disp('打印精细选择结果');
str = sprintf( 'SVM parameters for R:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);
% 训练SVM
cmd = ['-c ', num2str(bestc), ' -g ', num2str(bestg) , ' -s 3 -p 0.1'];
r_model = svmtrain(r, tsx, cmd);
% 预测
[r_predict,r_mse] = svmpredict(r,tsx,low_model);
r_predict = mapminmax('reverse',r_predict,r_ps);
predict_r = svmpredict(1,win_num+1,r_model);
predict_r = mapminmax('reverse',predict_r,r_ps);
predict_r
%% 对于R的回归预测结果分析
figure;
hold on;
plot(R,'b+');
plot(r_predict,'r*');
legend('original r','predict r',2);
title('original vs predict','FontSize',12);
grid on;
figure;
error = r_predict - R';
plot(error,'ro');
title('误差(predicted data-original data)','FontSize',12);
grid on;
% snapnow;
%% 利用SVM对Up进行回归预测
% 数据预处理,将up进行归一化处理
% mapminmax为matlab自带的映射函数
[up,up_ps] = mapminmax(Up);
up_ps.ymin = 100;
up_ps.ymax = 500;
% 对Up进行归一化
[up,up_ps] = mapminmax(Up,up_ps);
% 画出Up归一化后的图像
figure;
plot(up,'gx');
title('Up归一化后的图像','FontSize',12);
grid on;
% 对up进行转置,以符合libsvm工具箱的数据格式要求
up = up';
snapnow;
% 选择回归预测分析中最佳的SVM参数c&g
% 首先进行粗略选择
[bestmse,bestc,bestg] = SVMcgForRegress(up,tsx,-10,10,-10,10,3,1,1,0.5);
% 打印粗略选择结果
disp('打印粗略选择结果');
str = sprintf( 'SVM parameters for Up:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);
% 根据粗略选择的结果图再进行精细选择
[bestmse,bestc,bestg] = SVMcgForRegress(up,tsx,-4,8,-10,10,3,0.5,0.5,0.2);
% 打印精细选择结果
disp('打印精细选择结果');
str = sprintf( 'SVM parameters for Up:Best Cross Validation MSE = %g Best c = %g Best g = %g',bestmse,bestc,bestg);
disp(str);
% 训练SVM
cmd = ['-c ', num2str(bestc), ' -g ', num2str(bestg) , ' -s 3 -p 0.1'];
up_model = svmtrain(up, tsx, cmd);
% 预测
[up_predict,up_mse] = svmpredict(up,tsx,up_model);
up_predict = mapminmax('reverse',up_predict,up_ps);
predict_up = svmpredict(1,win_num+1,up_model);
predict_up = mapminmax('reverse',predict_up,up_ps);
predict_up
%% 对于Up的回归预测结果分析
figure;
hold on;
plot(Up,'b+');
plot(up_predict,'r*');
legend('original up','predict up',2);
title('original vs predict','FontSize',12);
grid on;
figure;
error = up_predict - Up';
plot(error,'ro');
title('误差(predicted data-original data)','FontSize',12);
grid on;
toc;
% snapnow;
%% 子函数 SVMcgForRegress.m
function [mse,bestc,bestg] = SVMcgForRegress(train_label,train,cmin,cmax,gmin,gmax,v,cstep,gstep,msestep,flag)
% SVMcgForClass
% 输入:
% train_label:训练集标签.要求与libsvm工具箱中要求一致.
% train:训练集.要求与libsvm工具箱中要求一致.
% cmin:惩罚参数c的变化范围的最小值(取以2为底的对数后),即 c_min = 2^(cmin).默认为 -5
% cmax:惩罚参数c的变化范围的最大值(取以2为底的对数后),即 c_max = 2^(cmax).默认为 5
% gmin:参数g的变化范围的最小值(取以2为底的对数后),即 g_min = 2^(gmin).默认为 -5
% gmax:参数g的变化范围的最小值(取以2为底的对数后),即 g_min = 2^(gmax).默认为 5
% v:cross validation的参数,即给测试集分为几部分进行cross validation.默认为 3
% cstep:参数c步进的大小.默认为 1
% gstep:参数g步进的大小.默认为 1
% msestep:最后显示MSE图时的步进大小.默认为 20
% 输出:
% bestacc:Cross Validation 过程中的最高分类准确率
% bestc:最佳的参数c
% bestg:最佳的参数g
% about the parameters of SVMcgForRegress
if nargin < 11
flag = 0;
end
if nargin < 10
msestep = 0.1;
end
if nargin < 7
msestep = 0.1;
v = 3;
cstep = 1;
gstep = 1;
end
if nargin < 6
msestep = 0.1;
v = 3;
cstep = 1;
gstep = 1;
gmax = 5;
end
if nargin < 5
msestep = 0.1;
v = 3;
cstep = 1;
gstep = 1;
gmax = 5;
gmin = -5;
end
if nargin < 4
msestep = 0.1;
v = 3;
cstep = 1;
gstep = 1;
gmax = 5;
gmin = -5;
cmax = 5;
end
if nargin < 3
msestep = 0.1;
v = 3;
cstep = 1;
gstep = 1;
gmax = 5;
gmin = -5;
cmax = 5;
cmin = -5;
end
% X:c Y:g cg:mse
[X,Y] = meshgrid(cmin:cstep:cmax,gmin:gstep:gmax);
[m,n] = size(X);
cg = zeros(m,n);
% record accuracy with different c & g,and find the best mse with the smallest c
bestc = 0;
bestg = 0;
mse = 10^10;
basenum = 2;
for i = 1:m
for j = 1:n
cmd = ['-v ',num2str(v),' -c ',num2str( basenum^X(i,j) ),' -g ',num2str( basenum^Y(i,j) ),' -s 3'];
cg(i,j) = svmtrain(train_label, train, cmd);
if cg(i,j) < mse
mse = cg(i,j);
bestc = basenum^X(i,j);
bestg = basenum^Y(i,j);
end
if ( cg(i,j) == mse && bestc > basenum^X(i,j) )
mse = cg(i,j);
bestc = basenum^X(i,j);
bestg = basenum^Y(i,j);
end
end
end
% draw the accuracy with different c & g
[cg,ps] = mapminmax(cg,0,1);
figure;
subplot(1,2,1);
[C,h] = contour(X,Y,cg,0:msestep:0.5);
clabel(C,h,'FontSize',10,'Color','r');
xlabel('log2c','FontSize',12);
ylabel('log2g','FontSize',12);
title('参数选择结果图(等高线图)','FontSize',12);
grid on;
subplot(1,2,2);
meshc(X,Y,cg);
% mesh(X,Y,cg);
% surf(X,Y,cg);
axis([cmin,cmax,gmin,gmax,0,1]);
xlabel('log2c','FontSize',12);
ylabel('log2g','FontSize',12);
zlabel('MSE','FontSize',12);
title('参数选择结果图(3D视图)','FontSize',12);
filename = ['c',num2str(bestc),'g',num2str(bestg),num2str(msestep),'.tif'];
% if flag == 1;
% print('-dtiff','-r600',filename);
% end
FIG_D
function [low,R,up]=FIG_D(XX,MFkind,win_num)
%%
if nargin < 3
win_num = 10;
end
if nargin < 2
MFkind = 'trapezoid';
end
[d1,d2] = size(XX);
X = sort(XX);
switch MFkind
% trapezoid
case('trapezoid')
if win_num == 1
if mod(d2,2) ~= 0
m = X( (d2+1)/2 );
n = X( (d2+1)/2 );
mflag = (d2+1)/2;
nflag = (d2+1)/2;
else
m = X( d2/2 );
n = X( (d2+2)/2 );
mflag = d2/2;
nflag = (d2+2)/2;
end
R(1,1) = m;
R(2,1) = n;
k1 = mflag;
k2 = d2 - nflag+1;
c1 = ( sum(X(1:k1)) )/k1;
c2 = ( sum(X(nflag:d2)) )/k2;
low = 2*c1 - m;
up = 2*c2 - n;
else
low = [];
R = [];
up = [];
k = floor(d2/win_num);
for i = 1:(win_num-1)
[l,r,u]=FIG_D(XX( (1+(i-1)*k):(k+(i-1)*k) ),MFkind,1);
low = [low,l];
R = [R,r];
up = [up,u];
end
[l,r,u] = FIG_D(XX( (1+(win_num-1)*k):d2 ),MFkind,1);
low =[low,l];
R = [R,r];
up = [up,u];
end
%% triangle
case('triangle')
if win_num == 1
R = median(X);
m = median(X);
n = median(X);
mflag = floor(d2/2);
nflag = ceil(d2/2);
k1 = mflag;
k2 = d2-nflag+1;
c1 = ( sum(X(1:k1)) )/k1;
c2 = ( sum(X(nflag:d2)) )/k2;
low = 2*c1 - m;
up = 2*c2 - n;
else
low = [];
R = [];
up = [];
k = floor(d2/win_num);
for i = 1:(win_num-1)
[l,r,u]=FIG_D(XX( (1+(i-1)*k):(k+(i-1)*k) ),MFkind,1);
low = [low,l];
R = [R,r];
up = [up,u];
end
[l,r,u] = FIG_D(XX( (1+(win_num-1)*k):d2 ),MFkind,1);
low =[low,l];
R = [R,r];
up = [up,u];
end
%% asygauss
case('asygauss') %这个与基于Pedrycz的是一样的,因为高斯型的核函数无法修改
if win_num == 1
R = median(X);
m = median(X);
n = median(X);
mflag = floor(d2/2);
nflag = ceil(d2/2);
a_final = 0;
Qa_final = 0;
for index = 1:( mflag-1 )
a = X(index);
Qa=0;
x = X( 1:(mflag-1) );
y = (x<=m).*(exp(-(x-m).^2/a^2) );
Qa = sum(y);
Qa = Qa/(m-a);
if Qa>=Qa_final
Qa_final = Qa;
a_final = a;
end
end
low = a_final;
b_final = 0;
Qb_final = 0;
for index = ( nflag+1 ):d2
b = X(index);
Qb = 0;
x = X( (nflag+1):d2 );
y = (x>=m).*(exp(-(x-m).^2/b^2) );
Qb = sum(y);
Qb = Qb/(b-n);
if Qb>=Qb_final
Qb_final = Qb;
b_final = b;
end
end
up = b_final;
else
low = [];
R = [];
up = [];
k = floor(d2/win_num);
for i = 1:(win_num-1)
[l,r,u]=FIG_P(XX( (1+(i-1)*k):(k+(i-1)*k) ),MFkind,1);
low = [low,l];
R = [R,r];
up = [up,u];
end
[l,r,u] = FIG_P(XX( (1+(win_num-1)*k):d2 ),MFkind,1);
low =[low,l];
R = [R,r];
up = [up,u];
end
%% asyparabola
case('asyparabola')
if win_num == 1
R = median(X);
m = median(X);
n = median(X);
mflag = floor(d2/2);
nflag = ceil(d2/2);
a_final = 0;
Qa_final = 0;
for index = 1:( mflag-1 )
a = X(index);
Qa=0;
x = X( 1:( mflag-1) );
y=(x<=m).*(1-(m-x).^2/(m-a)^2);
Qa = sum(y);
Qa = Qa/(m-a);
if Qa>=Qa_final
Qa_final = Qa;
a_final = a;
end
end
low = a_final;
b_final = 0;
Qb_final = 0;
for index = ( nflag+1 ):d2
b = X(index);
Qb = 0;
x = X( (nflag+1):d2 );
y=(x>=m).*(1-(m-x).^2/(m-b)^2);
Qb = sum(y);
Qb = Qb/(b-n);
if Qb>=Qb_final
Qb_final = Qb;
b_final = b;
end
end
up = b_final;
else
low = [];
R = [];
up = [];
k = floor(d2/win_num);
for i = 1:(win_num-1)
[l,r,u]=FIG_P(XX( (1+(i-1)*k):(k+(i-1)*k) ),MFkind,1);
low = [low,l];
R = [R,r];
up = [up,u];
end
[l,r,u] = FIG_P(XX( (1+(win_num-1)*k):d2 ),MFkind,1);
low =[low,l];
R = [R,r];
up = [up,u];
end
end
如果需要数据的话麻烦私信我,谢谢。