[] Knn machine learning algorithms to achieve self

. 1  Import PANDAS AS PD
 2  Import numpy AS NP
 . 3  
. 4  
. 5  DEF build_data ():
 . 6      "" " 
. 7      to load data
 . 8      : return:
 . 9      " "" 
10      # . 1, data is loaded 
. 11      Data = pd.read_excel ( " ./ Movie classification data .xlsx " )
 12 is  
13 is      return data
 14  
15  
16  DEF distance (V1, V2):
 . 17      " "" 
18 is      to calculate the distance
 . 19      : param V1: point. 1
 20 is      : param V2: point 2
 21 is     : return: the distance dist
 22 is      "" " 
23 is      # Method 1 
24      # V1 conversion matrix is a matrix array, and then reduced to 1-dimensional 
25      # V1 = v1.A [0] 
26 is      # Print (V1) 
27      # sum_ = 0 
28      # for I in Range (v1.shape [0]): 
29      #      sum_ + = (V1 [I] - V2 [I]) 2 ** 
30      # dist = np.sqrt (sum_) 
31 is      # Print (dist) 
32      # method 2 
33 is      dist = np.sqrt (np.sum (np.power ((V1 - V2), 2 )))
 34 is      return dist
 35  
36  
37 [  DEFknn_owns (Train, Test, K):
 38 is      "" " 
39      from implemented algorithms knn
 40      : param Train: training set
 41 is      : param Test: Test Set
 42 is      : param K: number of neighbor
 43 is      : return: None
 44 is      " "" 
45      # each test sample to each training sample is necessary to calculate a distance 
46 is      # for I in Range (train.shape [0]): 
47      #      distance (test [. 1:], train.iloc [I,. 1:]) 
48      for index in train.index:
 49          dist Distance = (Test [. 1:]. values, train.loc [index, " funny lens " : " fight scene "] .values)
 50          train.loc [index, " dist " ] = dist
 51 is  
52 is      # according to ascending order of the distance 
53 is      RES = train.sort_values (= by " dist " ) .head (K) [ " Genre " ] .mode ()
 54 is  
55      # Print (Train) 
56 is      Print (RES)
 57 is  
58  
59  DEF main ():
 60      "" " 
61 is      the primary function
 62 is      : return:
 63 is      " "" 
64      # . 1, loading data 
65      data =build_data ()
 66      Print ( " Data: \ n- " , Data)
 67      # 2, split data set 
68      # training set 
69      Train = data.iloc [:,. 1: -4 ]
 70      Print ( " train_ \ n- " , Train)
 71 is      # test set 
72      test data.columns = [-4 :]
 73 is      Print ( " test _: \ n- " , test)
 74      # 2, to achieve self-KNN 
75      # specified neighbors k 
76      k = int (INPUT ( " Please enter the number of neighbor: "))
 77      # number of neighbor k different results in different 
78      knn_owns (Train, Test, k)
 79  
80  
81  IF  the __name__ == ' __main__ ' :
 82      main ()
 83  
84  # Super parameters: one can specify a different value to obtain different result, k value in the training process, does not change. 
85  # Super tuning parameters: super parameter adjustment and optimization, to try to keep --- hyper-parameters of different values, to obtain the best results, then the time corresponding to the hyper-parameters is optimal

Guess you like

Origin www.cnblogs.com/Tree0108/p/12116175.html