Python K-means clustering

Principle: k-means clustering algorithm (k-means clustering algorithm) clustering algorithm is iterative solver, the step of randomly selected objects as the initial K cluster centers, each object is then calculated with the respective seed clusters the distance between the center of each object allocated to its nearest cluster center distance. Cluster centers and the object assigned to them on behalf of a cluster. Each sample is assigned a clustering of the cluster centers are recalculated based on the existing cluster objects. This process is repeated until a termination condition is met. Termination condition may be no (or minimal number) of the object is reassigned to different clusters, no (or minimal number) cluster centers then vary, local minimum squared error.

Second, the program code

# -*- coding:utf-8 -*-
import random
import math
def formula_diatance(a= None,b=None):
    result=math.sqrt(pow((a[0]-b[0]),2)+pow((a[1]-b[1]),2))
    return result
def GetNewPoint(list=None):

    x=0
    y=0
    for l in list:
        x+=l[0]
        y+=l[1]
    result=[x/len(list),y/len(list)]
    return result
def row2column(list=None):
    kind=[]
    i=len(list)
    j=len(list[0])
    for m in range(0,j):
        col = []
        for n in range(0,i):
            col.append(list[n][m])
        kind.append(col)

    return kind
def which_kind(list=None,data=None):
    i=len(list)
    j=len(list[0])
    dict={}
    for m in range(0,j):
        dict[m]=[]
    for n in range(0,i):
        min=list[n][0]
        for l in list[n]:
            if min>=l:
                min=l
               dict[list[n].index(min)].append(data[n])
    kind=[]
    for value in dict.values():
        kind.append(value)

    return kind;
def k_means(x=None,k=0,k_list=None,class_list=None,round=0):
    if k_list is None:
        k_list=[]
        while len(k_list)!=k:
            r=random.randint(0,len(x)-1)
            if r not in k_list:
                k_list.append(r)
              print('初始随机点:'+str(x[k_list[0]])+'  ,  '+str(x[k_list[1]]))

        kind_list = []
        for i in range(0, len(k_list)):
            d_list = []
            for j in range(0, len(x)):
                d_list.append(formula_diatance(x[k_list[i]], x[j]))
            kind_list.append(d_list)
        kind_list = row2column(kind_list)
        class_list = which_kind(kind_list,x)
               round+=1
        print('第'+str(round)+'轮:'+str(class_list))
        point=[]
        for l in class_list:
            point.append(GetNewPoint(l))
                    print('新均值点:' + str(point))
        y=k_means(x=x,k_list=point,class_list=class_list,round=round)
        return y
    else:
        k=len(k_list)
      
        kind_list = []
        for i in range(0, len(k_list)):
            d_list = []
            for j in range(0, len(x)):
                d_list.append(formula_diatance(k_list[i], x[j]))
            kind_list.append(d_list)
        kind_list = row2column(kind_list)
        cl_list = which_kind(kind_list,x)
               round += 1
        print('第' + str(round) + '轮:' + str(cl_list))
        is_equal=0
       
        for l in cl_list:
            if l in class_list:
                is_equal+=1

        if k==is_equal:
            return cl_list

        
        point = []
        for l in cl_list:
            point.append(GetNewPoint(l))
               print('新均值点:'+str(point))
        y=k_means(x=x, k_list=point, class_list=cl_list,round=round)
        return y
if __name__ == '__main__':

    x=[[1,1],[4,5],[3,5],[4,4],[2,2],[3,4],[0,0]]
    print('输入数据:'+str(x))
    y=k_means(x=x,k=2)
    print('最后结果:'+str(y))

 Author: WangB

 

Published 146 original articles · won praise 213 · views 10000 +

Guess you like

Origin blog.csdn.net/s0302017/article/details/103763782