KNN----python
def euclidean_dist_squared(X, Xtest):
return np.sum(X**2, axis=1)[:,None] + np.sum(Xtest**2, axis=1)[None] - 2 * np.dot(X,Xtest.T)
return np.sum(X**2, axis=1)[:,None] + np.sum(Xtest**2, axis=1)[None] - 2 * np.dot(X,Xtest.T)
# -*- coding: utf-8 -*- """ Created on Fri May 4 09:47:05 2018 @author: sun_y """ import numpy as np from scipy import stats import utils class my_knn: def __init__(self, k): self.k = k def fit(self, X, y): self.X = X # just memorize the trianing data self.y = y def predict(self, Xtest): ''' YOUR CODE HERE FOR Q4.1.1 ''' self.Xtest=Xtest t=self.Xtest.shape[0] D=[] # D = utils.euclidean_dist_squared(self.X,self.Xtest) D = utils.euclidean_dist_squared(self.Xtest,self.X) yhat =np.zeros(t) sort_indexD=[] sort_indexD=D.argsort() #print(np.shape(sort_indexD)) # print(sort_indexD) ynear_index=[] ynear_index=sort_indexD[:,0:self.k] ynear=np.zeros((t,self.k)) for i in range(t): for j in range(self.k): ynear[i,j]=np.array(self.y[ynear_index[i,j]]) yhat[i]=utils.mode(ynear[i,:]) return yhat
【定义数组】
c=np.array([[1,2],[3,4]])
【numpy.shape】 x.shape[0]=2 np.shape(x)=(2,2)
【numpy.sort】 x.sort()
x.argsort()
***************************************************************************************************************************
【load CSV file】
import pandas as pd
# pandas 模块:点击打开链接
locationNum=5&fps=1
import os # os 模块:点击打开链接
df = pd.read_csv(os.path.join('E:\\assignment 2018 with code\\p2o1b_a1-master\\p2o1b_a1-master\\data','fluTrends.csv'))
import numpy as np from sklearn.cross_validation import train_test_split from sklearn.datasets import load_iris from my_knn import my_knn iris = load_iris() n, d = iris.data.shape """ print(iris.keys()) print((n_samples, n_features)) print(iris.data[0]) print(iris.target.shape) print(iris.target) print(iris.target_names) print("feature_names:",iris.feature_names) """ X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1) KValue=4 model=my_knn(k=KValue) model.fit(X_train,y_train) y_pred = model.predict(X_train) train_error = np.mean(y_pred != y_train) print("train error is %f" % train_error) y_pred_test=model.predict(X_test) test_error=np.mean(y_pred_test != y_test) print("test error is %f" % test_error)
【测试结果:】
k=4时
train error is 0.029630 test error is 0.066667
***************************************分割线******************************************************************
【python 自带KNN】
import numpy as np from sklearn import neighbors model2 = neighbors.KNeighborsClassifier() #取得knn分类器 model2.fit(X_train,y_train) y_pred = model2.predict(X_train) train_error = np.mean(y_pred != y_train) print("train error is %f" % train_error) y_pred_test=model2.predict(X_test) test_error=np.mean(y_pred_test != y_test) print("test error is %f" % test_error)
train error is 0.022222 test error is 0.066667
测试结果一致
【返回 true 和 false】
return True # 注意大写