在网易公开课上看了吴恩达的课。自己用python实现了一遍
import numpy as np R=2 alpha=0.01 beta=0.5 def norm_dataset(data_set,bool_set): nu=len(data_set);nm=len(data_set[0]);miu=[] for j in range(nm): sum_score=0.0;com_num=0.0 for i in range(nu): if bool_set[i][j]: sum_score=sum_score+data_set[i][j] com_num=com_num+1.0 miu.append(sum_score/com_num) for i in range(nu): if bool_set[i][j]: data_set[i][j]=data_set[i][j]*1.0-sum_score*1.0/com_num else: data_set[i][j]=-1.0 return miu def calc_loss(x,theta,data_set,bool_set): nu=len(data_set);nm=len(data_set[0]);total_loss=0.0 for i in range(nu): for j in range(nm): if bool_set[i][j]: xTtheta=0.0;x_loss=0.0;theta_loss=0.0 for k in range(R): xTtheta=xTtheta+x[i,k]*theta[k,j] x_loss=x_loss+x[i,k]*x[i,k] theta_loss=theta_loss+theta[k,j]*theta[k,j] total_loss=total_loss+0.5*(xTtheta-data_set[i][j])*(xTtheta-data_set[i][j])+beta*x_loss+beta*theta_loss return total_loss def eval(x,theta,data_set,bool_set): nu=len(data_set);nm=len(data_set[0]);total_loss=0.0 for i in range(nu): for j in range(nm): if bool_set[i][j]: xTtheta=0.0;x_loss=0.0;theta_loss=0.0 for k in range(R): xTtheta=xTtheta+x[i,k]*theta[k,j] total_loss=total_loss+0.5*(xTtheta-data_set[i][j])*(xTtheta-data_set[i][j]) return total_loss def new_x_and_theta(x,theta,data_set,bool_set): nu=len(data_set);nm=len(data_set[0]);total_loss=0.0;xTtheta_minus_y=0.0 x_partial=np.zeros((nu,R));theta_partial=np.zeros((R,nm)) for i in range(nu): for j in range(nm): xTtheta=0.0; if bool_set[i][j]: for k in range(R): xTtheta=xTtheta+x[i,k]*theta[k,j] for k in range(R): x_partial[i,k]=x_partial[i,k]+((xTtheta-data_set[i][j])*theta[k,j]+beta*x[i,k]) for k in range(R): theta_partial[k,j]=theta_partial[k,j]+((xTtheta-data_set[i][j])*x[i,k]+beta*theta[k,j]) for i in range(nu): for k in range(R): x[i,k]=x[i,k]-alpha*x_partial[i,k] for j in range(nm): for k in range(R): theta[k,j]=theta[k,j]-alpha*theta_partial[k,j] if __name__ == '__main__': data_set=[[5,5,0,0],[5,-1,-1,0],[-1,4,0,-1],[0,0,5,4],[0,0,5,-1],[-1,-1,-1,-1]] bool_set=[[],[],[],[],[],[]] nu=len(data_set);nm=len(data_set[0]) for i in range(nu): for j in range(nm): if data_set[i][j]!=-1: bool_set[i].append(True) else: bool_set[i].append(False) x=np.random.random(size=(nu,R));theta=np.random.random(size=(R,nm)) miu=norm_dataset(data_set,bool_set) print(data_set) print(miu) min_loss=10000000 final_x=np.random.random(size=(nu,R));final_theta=np.random.random(size=(R,nm)) for e in range(1000): loss=calc_loss(x,theta,data_set,bool_set) if(loss<min_loss): # print(min_loss) min_loss=loss final_x=x.copy() final_theta=theta.copy() new_x_and_theta(x,theta,data_set,bool_set) print(final_x) print(final_theta) print(eval(final_x,final_theta,data_set,bool_set)) result=np.zeros((nu,nm)) for i in range(nu): for j in range(nm): for k in range(R): result[i,j]=result[i,j]+final_x[i,k]*final_theta[k,j] result[i,j]=result[i][j]+miu[j] for i in range(nu): for j in range(nm): if bool_set[i][j]: data_set[i][j]=data_set[i][j]+miu[j] print(result) print(data_set)