10 lines of code to implement KNN (K nearest neighbors)

1. A summary of K's neighbors in one sentence: those who are close to Zhu are red, those who are close to ink are black + voting

    Not much to say, on the code


2. Generate a dataset with sklearn

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
n_samples = 5000
n_bins = 3  # use 3 bins for calibration_curve as we have 3 clusters here
centers = [(-1, -1), (5, 5)]
X, y = make_blobs(n_samples=n_samples, n_features=2, cluster_std=1.0,
centers=centers, shuffle=False, random_state=42)                  

y[:n_samples // 2] = 0
y[n_samples // 2:] = 1
sample_weight = np.random.RandomState(42).rand(y.shape[0])
X_train, X_test, y_train, y_test, sw_train, sw_test = \
    train_test_split(X, y, sample_weight, test_size=0.1, random_state=42)


3. Classification - KNN

k = 500
sum1 = 0def result(dist,k):

    index = dist.argsort() #Get the sorted array
     index index = index[:k]
    out = y_train[index].tolist()
    return out.count(0) < k - out.count(0)
for i in range(len(X_test)):
    dist = distance(X_test[i], X_train)
    sum1 = sum1 + np.equal(y_test[i],result(dist,k))#y_train[dist.index(min(dist))])print(np.float(sum1/len(X_test)))
 
 

4. Drawing display

    

plt.scatter(X_train[:,0],X_train[:,1],c=y_train)
plt.scatter(X_test[:,0],X_test[:,1],c='b')
plt.show()


5. Expansion--KD number storage and search

class Node:
    def __init__(self, data, lchild = None, rchild = None):
        self.data = data
        self.lchild = lchild
        self.rchild = rchild

    def create ( self , dataSet , depth):   # Create a kd tree and return the root node
 if ( len (dataSet) > 0 ):        
            m , n = np.shape(dataSet)   # Find the sample row, column
             midIndex = m / 2   # The index position of the middle number
             axis = depth % n   # Determine which axis to use to divide the data, corresponding to the algorithm 3.2 (2) formula in the book j()
             sortedDataSet = self .sort(dataSet , axis)   # Sort
             node = Node(sortedDataSet[midIndex])   # Set the node data field to the median, please refer to the following book for details
             # print sortedDataSet[midIndex]
             leftDataSet = sortedDataSet[ : midIndex]   # Create 2 copies to the left of the median
             rightDataSet = sortedDataSet[midIndex + 1 :]
             print (leftDataSet)
             print (rightDataSet)
            node.lchild = self .create(leftDataSet , depth + 1 )   # recursively create the tree by passing in the median left sample
             node.rchild = self .create(rightDataSet , depth + 1 )
             return node
         else :
             return None
     def sort ( self , dataSet , axis):   # Use bubble sort and use aixs as the axis to divide
 sortDataSet = dataSet[:]   # Since the original sample cannot be destroyed, create a copy here
 m , n = np.shape(sortDataSet)
         for i in range
                (m):
            for j in range(0, m - i - 1):
                if (sortDataSet[j][axis] > sortDataSet[j + 1][axis]):
                    temp = sortDataSet[j]
                    sortDataSet[j] = sortDataSet[j + 1]
                    sortDataSet[j + 1] = temp
        print(sortDataSet)
        return sortDataSet

    def preOrder(self, node):
        if node != None:
            print("tttt->%s" % node.data)
            self.preOrder(node.lchild)
            self.preOrder(node.rchild)

    #kd tree search
 def search ( self , tree , x):   # search for
 self .nearestPoint = None   # save the nearest point
 self .nearestValue = 0   # save the nearest value
 def travel (node ​​, depth= ​​0 ):   # recursive search
 if node != None :   # recursive termination condition
 n = len (x)   # feature number
 axis = depth % n   # calculation axis
 if x[axis] < node.data[axis]:   # if the data is less than the node, then go to the left Click to find
 travel(node.lchild , depth +                    
                                                                                        1)
                else:
                    travel(node.rchild, depth + 1)

                    # The following is the end of the recursion, corresponding to algorithm 3.3(3)
                 distNodeAndX = self.dist(x , node.data)   # The distance between the target and the node is judged
 if (self.nearestPoint == None ):   # Determine the current point and update the nearest point and the nearest value, corresponding to Algorithm 3.3(3)(a)
 self.nearestPoint = node.data                                    
                    self.nearestValue = distNodeAndX
                elif (self.nearestValue > distNodeAndX):
                    self.nearestPoint = node.data
                    self.nearestValue = distNodeAndX

                print (node.data , depth , self.nearestValue , node.data[axis] , x[axis])
                 if ( abs (x[axis] - node.data[axis]) <= self.nearestValue):   # Determine if You need to go to the area of ​​the child node to find (circle judgment), corresponding to algorithm 3.3(3)(b)
 if x[axis] < node.data[axis]:                    
                        travel(node.rchild, depth + 1)
                    else:
                        travel(node.lchild, depth + 1)

        travel(tree)
        return self.nearestPoint


    def dist ( self , x1 , x2):   # Euclidean distance calculation
 return ((np.array(x1) - np.array(x2)) ** 2 ).sum() ** 0.5        


Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326431699&siteId=291194637