建模 -MATLAB算法

算法类型

PCA算法

降纬分析

clc
clear
p_threshold = 0.9;     //%累积贡献率阈值
data = [149.3,4.2,108.1;161.2,4.1,114.8
           171.5,6.1,123.2;175.5,3.1,126.9
           180.8,8.1,132.1;110.7,2.2,137.7
           202.1,2.1,146.0;212.4,5.6,154.1
           226.1,5.0,162.3;231.9,5.1,164.3
           239.0,0.7,167.6];                            //%s输入原始数据
[m,n] = size(data);      // m=11 n=3
%数据的标准化处理
normal_data =(data -  repmat(mean(data),m,1)) ./repmat(std(data),m,1);
%计算协方差矩阵
sigama = cov(normal_data);
%计算协方差矩阵的特征值和特征向量
[V,lamadas] = eig(sigama);
%将特征值保存在一个行向量中
lamada = sum(lamadas);
%特征值进行降序排列,并记录特征值与特征向量的对应顺序
[order_lamada,index] = sort(-lamada);
order_lamada = -order_lamada;
%确定主成分个数
for i=1:length(lamada)
    P =sum(order_lamada(1:i)) / sum(order_lamada);%累计贡献率
    if P> p_threshold
       break
    end
end
num_pca = i;
%提取主成分的特征向量
V_main = V(:,index(1:i));
%计算主成分得分
new_score=normal_data * V_main;
%输出结果
disp('特征值、累计贡献率:')
order_lamada,P
disp('主成分个数与特征向量:')
num_pca
V_main
plot3(normal_data(:,1),normal_data(:,2),normal_data(:,3),'b*')
hold on
new_data = (V_main*V_main'*normal_data')';
plot3(new_data(:,1),new_data(:,2),new_data(:,3),'r--o')
xlabel('X');ylabel('Y');zlabel('Z');
title('原始数据和主成分得分对比')
legend('原始数据','主成分得分')
hold on
h =quiver3(0,0,0,V_main(1,1),V_main(2,1),V_main(3,1),2,'k','linewidth',1.5);
set(h,'maxheadsize',7);
hold on
h =quiver3(0,0,0,V_main(1,2),V_main(2,2),V_main(3,2),2,'k','linewidth',1.5);
set(h,'maxheadsize',4);
hold off
view(-27,10)

聚类分析

K-means聚类
clc
clear
close all
%输入原始数据
data =[0.697,0.460;0.774,0.376;0.634,0.264;0.608,0.318;0.556,0.215;0.403,0.237;
       0.481,0.149;0.437,0.211;0.666,0.091;0.243,0.267;0.245,0.057;0.343,0.099;
       0.639 0.161;0.657,0.198;0.360,0.370;0.593,0.042;0.719,0.103;0.359,0.188;
       0.339,0.241;0.282,0.257;0.748,0.232;0.714,0.346;0.483,0.312;0.478,0.437;
       0.525,0.369;0.751,0.489;0.532,0.472;0.473,0.376;0.725,0.445;0.446,0.459;];
%计算样本数量和特征数量
[num,num_feature] = size(data);
%聚类数量
k = 3;
%对每个特征数据按照极差法进行归一化
max_feature = max(data);
min_feature = min(data);
range_feature = max_feature - min_feature;
normal_data=(data - repmat(min_feature,num,1))./ repmat(range_feature,num,1);
%随机选择初始聚类中心
center = rand(k,num_feature);
%最大迭代次数
max_items = 10; count = 1;
while ( count <= max_items )
       %计算每个样本点到每个聚类中心的欧氏距离
       for  i = 1:k
           distance(i,:) = sum((normal_data - repmat(center(i,:),num,1))'.^2);
       end
       SSE = sum(min(distance))
       %判断每个样本点属于哪一类
       [~,belong_class] = min(distance);
       %更新聚类中心位置
       for i = 1:k
           center(i,:) = mean(normal_data(belong_class == i , :));
       end
       %记录每次迭代的聚类中心位置
       record_center(count,:) = reshape(center,1,k*num_feature);
       count = count + 1;
end
%聚类中心反归一化
center = repmat(min_feature,k,1) + center .* repmat(range_feature,k,1);
%判断聚类中心是否收敛
plot(record_center,'linewidth',2)
xlabel('迭代次数');
ylabel('聚类中心位置');
title('聚类中心位置变化趋势');
%描述不同样本的分类情况
figure
colormap = char(['r','b','k','g']);
fontmap = char(['*','^','o','p']);
for  i =1:k
   plot(data(belong_class == i,1),data(belong_class ==i,2),[colormap(i),fontmap(1)])
    holdon
   plot(center(i,1),center(i,2),[colormap(i),fontmap(2)])
end
xlabel('迭代次数'); ylabel('样本点'); title('样本点分类');

Guess you like

Origin blog.csdn.net/weixin_51552144/article/details/119355376