k-means pseudocode

1, initialize k clusters centers.

2, update all sample points belonging clusters: cluster sample point to the center point of which recently belong to which clusters.

3, recalculation of the center point of each cluster (cluster until the center point of no change or update reached the maximum number)

# K-means pseudocode 
Import numpy AS NP
 Import Copy
 # calculates Euclidean distance 
DEF get_distance (X-, the Y):
     return np.sum ((the XY) ** 2) ** 0.5
 DEF calc_mean (X-): # calculated center point each dimension averaging 
    L = len (X-[0]) 
    list_mean = []
     for I in Range (L): 
        S = 0
         for J in X-: 
            S + = J [I] 
        m = S / len (X-) 
        list_mean.append (m) 
    return list_mean 
 
DEFk_means (x_train, K, max_iter): 
    num_iter = 0
     # Initial cluster center 
    cluster_center = x_train [: K] 
    pre_cluster_center = copy.deep_copy (cluster_center) # last cluster center point 
    # iterating 
    the while num_iter < max_iter:
         # temporary variables 
        clusters_data } = { # dictionary {cluster index: coordinate} 
        for I in x_train: 
            cluster_dists = []
             for index, cluster in the enumerate (cluster_center): 
                Distance = get_distance (I, cluster)
                cluster_dists.append ((index, Distance)) # of each sample to the distance between the center 
            
            cluster_dists.sort (Key = the lambda X: X [. 1]) # ascending 
            min_index, min_dist cluster_dists = [0] # fetch Nearest 
            
            IF min_index Not  in clusters_data: 
                clusters_data [min_index] = [] 
            clusters_data [min_index] .append (I) # add data to the temporary variable 
            
        # updated cluster center point 
        for index in clusters_data: 
            cluster_center [index] = calc_mean (clusters_data [index])
        IF pre_cluster_center == cluster_center:
             BREAK    # If the cluster center point does not change, then the end of 
        the else : 
            pre_cluster_center = copy.deep_copy (cluster_center) # copy it 
    return cluster_center # returns the final cluster center point

 

Guess you like

Origin www.cnblogs.com/wqbin/p/11031167.html