1, initialize k clusters centers.
2, update all sample points belonging clusters: cluster sample point to the center point of which recently belong to which clusters.
3, recalculation of the center point of each cluster (cluster until the center point of no change or update reached the maximum number)
# K-means pseudocode Import numpy AS NP Import Copy # calculates Euclidean distance DEF get_distance (X-, the Y): return np.sum ((the XY) ** 2) ** 0.5 DEF calc_mean (X-): # calculated center point each dimension averaging L = len (X-[0]) list_mean = [] for I in Range (L): S = 0 for J in X-: S + = J [I] m = S / len (X-) list_mean.append (m) return list_mean DEFk_means (x_train, K, max_iter): num_iter = 0 # Initial cluster center cluster_center = x_train [: K] pre_cluster_center = copy.deep_copy (cluster_center) # last cluster center point # iterating the while num_iter < max_iter: # temporary variables clusters_data } = { # dictionary {cluster index: coordinate} for I in x_train: cluster_dists = [] for index, cluster in the enumerate (cluster_center): Distance = get_distance (I, cluster) cluster_dists.append ((index, Distance)) # of each sample to the distance between the center cluster_dists.sort (Key = the lambda X: X [. 1]) # ascending min_index, min_dist cluster_dists = [0] # fetch Nearest IF min_index Not in clusters_data: clusters_data [min_index] = [] clusters_data [min_index] .append (I) # add data to the temporary variable # updated cluster center point for index in clusters_data: cluster_center [index] = calc_mean (clusters_data [index]) IF pre_cluster_center == cluster_center: BREAK # If the cluster center point does not change, then the end of the else : pre_cluster_center = copy.deep_copy (cluster_center) # copy it return cluster_center # returns the final cluster center point