dbscan简单实现

'''
核心点：eps内含有超过min数目的点
边界点：eps那点的数量小于min，但是在核心点的邻居
噪音点：任何不是核心点或者边界点的点
'''

import  numpy as np
import itertools

def CreateDataSet():
    dataset = np.array([[15.55,28.65],
[14.9,27.55],
[14.45,28.35],
[14.15,28.8],
[13.75,28.05],
[13.35,28.45],
[13,29.15],
[13.45,27.5],
[13.6,26.5],
[12.8,27.35],
[12.4,27.85],
[12.3,28.4],
[12.2,28.65],
[13.4,25.1],
[12.95,25.95]])
    return dataset


def dist(x,y):
    return sum((x-y)*(x-y))


data=CreateDataSet()
nrow=data.shape[0]

eps=1
mincount=2

core={}
core_dist=np.zeros((nrow,nrow))
for i in range(nrow):
    l=[]
    for j in range(nrow):
        tmp=dist(data[i,:],data[j,:])
        core_dist[i,j]=tmp
        if i!=j and tmp<eps:
            l.append(j)
    core[i]=l


#### outilers - 异常点

outilers=[key for key in core.keys() if len(core[key])<mincount]
#print(outilers)

#### cores   - 核心节点

cores=[key for key in core.keys() if len(core[key])>mincount]
#print(cores)

#### borders - 边界点
borders_tmp= list(itertools.chain.from_iterable([core[key] for key in cores]))
borders= list(set([ele for ele in borders_tmp if ele not in cores and ele not in outilers]))
#print(borders)


unvisitor=list(set(borders+cores))
c_all=[]
c_all.append(list(unvisitor))
for key in unvisitor:
    zj=unvisitor
    unvisitor.remove(key)
    for ele in unvisitor:
        if core_dist[key,ele]<eps:
            unvisitor.remove(ele)
            temp=core[key]+core[ele]
            for tmp in temp:
                if tmp in unvisitor:
                    unvisitor.remove(tmp)
    a=list(unvisitor)
    c_all.append(a)

class_res={}
for i in range(len(c_all)-1):
    temp="第"+str(i)+"类"
    class_res[temp]=[ ele for ele in c_all[i] if ele not in c_all[i+1] ]

print(class_res)
print(outilers)

输出：
{'第0类': [2, 3, 4, 5], '第1类': [7, 9, 10], '第2类': [6, 11, 12]}
[0, 1, 8, 13]
猜你喜欢