def(fileName): numFeat = len(open(fileName).readline().split('t')) - 1#get number of fields dataMat = []; labelMat = [] fr = open(fileName) for line in fr.readlines(): lineArr =[] curLine = line.strip().split('t') for i in range(numFeat): lineArr.append(float(curLine[i])) dataMat.append(lineArr) labelMat.append(float(curLine[-1])) return dataMat,labelMat
defstandRegres(xArr,yArr): xMat = mat(xArr); yMat = mat(yArr).T xTx = xMat.T*xMat if linalg.det(xTx) == 0.0: print"This matrix is singular, cannot do inverse" return ws = xTx.I * (xMat.T*yMat) return ws
局部加权线性回归
局部加权线性回归(Locally Weighted Linear Regression,LWLR)。在该算法中,我们给待预测点附近的每个点赋予一定的权重;然后在这个子集上基于最小均方差来进行普通的回归。与kNN一样,这种算法每次预测均需要事先选取出对应的数据子集。
deflwlrTest(testArr,xArr,yArr,k=1.0):#loops over all the data points and applies lwlr to each one m = shape(testArr)[0] yHat = zeros(m) for i in range(m): yHat[i] = lwlr(testArr[i],xArr,yArr,k) return yHat
defridgeRegres(xMat,yMat,lam=0.2): xTx = xMat.T*xMat denom = xTx + eye(shape(xMat)[1])*lam if linalg.det(denom) == 0.0: print"This matrix is singular, cannot do inverse" return ws = denom.I * (xMat.T*yMat) return ws defridgeTest(xArr,yArr): xMat = mat(xArr); yMat=mat(yArr).T yMean = mean(yMat,0) yMat = yMat - yMean #to eliminate X0 take mean off of Y #regularize X's xMeans = mean(xMat,0) #calc mean then subtract it off xVar = var(xMat,0) #calc variance of Xi then divide by it xMat = (xMat - xMeans)/xVar numTestPts = 30 wMat = zeros((numTestPts,shape(xMat)[1])) for i in range(numTestPts): ws = ridgeRegres(xMat,yMat,exp(i-10)) wMat[i,:]=ws.T return wMat