KNN:
function [result,tag]=KNN(X,Y,k) %这里Y为一行多列 [m,n]=size(X); tag=0; result=0; for i=1:m sum=0; for j=1:n if (isnan(Y(j))==0) sum=sum+(X(i,j)-Y(j))^2; else tag=j; end end Z(i,1:2)=[i,sqrt(sum)]; end Z=sortrows(Z,2);%进行排序 Q=Z(1:k,:); sum_t=0; for i=1:k sum_t=sum_t+Q(i,2); end Q(:,2)=Q(:,2)/sum_t; %计算求值 for i=1:k result=result+X(Q(i,1),tag)*Q(i,2); end endEM:
function [result,tag1]=EM(X,Y,e) A1=[0.25 0.25 0.25 0.25]; %初始系数 [m,n]=size(X); %获得矩阵大小 tag1=zeros(1,1); %标志 result=zeros(1,1); % %获取缺失数据的位置 for i=1:n if isnan(Y(i))==1 %判断是否是缺失数据位置 if tag1(1,1)==0 tag1=i; else tag1=[tag1,i]; end tag=i; u1=[mean(X(1:40,tag)),mean(X(41:80,tag)),mean(X(81:120,tag)),... mean(X(121:160,tag))]; b1=[var(X(1:40,tag),1),var(X(41:80,tag),1),... var(X(81:120,tag),1),var(X(121:160,tag),1)]; A=zeros(1,4); u=zeros(1,4); b=zeros(1,4); count=0; while (judge(A,A1,u,u1,b,b1,e)<1) count=count+1; if count>1000 break; end %开始进行计算分类 N_1=zeros(1,1); N_2=zeros(1,1); N_3=zeros(1,1); N_4=zeros(1,1); for j=1:m for k=1:4 compare(k)=A1(k)*exp((-1)*((X(j,tag)-u1(k))^2)/2/b1(k))/sqrt(b1(k)); end % compare [M I]=max(compare); %获得概率最大值和属于第几类I switch I case 1 if N_1(1,1)==0 N_1(1,1)=X(j,tag); else N_1=[N_1,X(j,tag)]; end case 2 if N_2(1,1)==0 N_2(1,1)=X(j,tag); else N_2=[N_2,X(j,tag)]; end case 3 if N_3(1,1)==0 N_3(1,1)=X(j,tag); else N_3=[N_3,X(j,tag)]; end case 4 if N_4(1,1)==0 N_4(1,1)=X(j,tag); else N_4=[N_4,X(j,tag)]; end end end % l1=length(N_1) % l2= length(N_2) % l3=length(N_3) % l4= length(N_4) u=u1; %赋值 b1=b; A=A1; %接下来计算新值 u1=[mean(N_1),mean(N_2),mean(N_3),mean(N_4)]; b1=[var(N_1,1),var(N_2,1),var(N_3,1),var(N_4,1)]; A1=[length(N_1)/m,length(N_2)/m,length(N_3)/m,length(N_4)/m]; % A1 end [M,I]=max(A1); if result(1,1)==0 result=u1(I); else result=[result,u1(I)]; end end end endfun:
clc; clear all; load('totalPDSdat.mat'); X=totalPDSdat(1:160,:); for i=161:168 Y(i-160,:)=totalPDSdat(i,:); if i<=164 Y(i-160,1+i-160)=NaN; %将其视为缺失的数据 A(i-160)=totalPDSdat(i,1+i-160); else switch(i) case 165 Y(i-160,13)=NaN; %将其视为缺失的数据 A(i-160)=totalPDSdat(i,13); case 166 Y(i-160,20)=NaN; %将其视为缺失的数据 A(i-160)=totalPDSdat(i,20); case 167 Y(i-160,23)=NaN; %将其视为缺失的数据 A(i-160)=totalPDSdat(i,23); case 168 Y(i-160,24)=NaN; %将其视为缺失的数据 A(i-160)=totalPDSdat(i,24); end end end Z=Y; for i=1:8 [result,tag1]=EM(X,Y(i,:),0.001); [m,n]=size(result); for j=1:n Z(i,tag1(j))=result(j); B(i)=result(j); end [result1,tag]=KNN(X,Y(i,:),10); C(i)=result1; end P=1:8; scatter(P,A,'r','filled'); hold on; scatter(P,B,'k','filled'); scatter(P,C,'b','filled'); axis([1,8, 0,210]); legend('原始数据','EM算法','KNN算法');