'''
核心点:eps内含有超过min数目的点
边界点:eps那点的数量小于min,但是在核心点的邻居
噪音点:任何不是核心点或者边界点的点
'''
import numpy as np
import itertools
def CreateDataSet():
dataset = np.array([[15.55,28.65],
[14.9,27.55],
[14.45,28.35],
[14.15,28.8],
[13.75,28.05],
[13.35,28.45],
[13,29.15],
[13.45,27.5],
[13.6,26.5],
[12.8,27.35],
[12.4,27.85],
[12.3,28.4],
[12.2,28.65],
[13.4,25.1],
[12.95,25.95]])
return dataset
def dist(x,y):
return sum((x-y)*(x-y))
data=CreateDataSet()
nrow=data.shape[0]
eps=1
mincount=2
core={}
core_dist=np.zeros((nrow,nrow))
for i in range(nrow):
l=[]
for j in range(nrow):
tmp=dist(data[i,:],data[j,:])
core_dist[i,j]=tmp
if i!=j and tmp<eps:
l.append(j)
core[i]=l
#### outilers - 异常点
outilers=[key for key in core.keys() if len(core[key])<mincount]
#print(outilers)
#### cores - 核心节点
cores=[key for key in core.keys() if len(core[key])>mincount]
#print(cores)
#### borders - 边界点
borders_tmp= list(itertools.chain.from_iterable([core[key] for key in cores]))
borders= list(set([ele for ele in borders_tmp if ele not in cores and ele not in outilers]))
#print(borders)
unvisitor=list(set(borders+cores))
c_all=[]
c_all.append(list(unvisitor))
for key in unvisitor:
zj=unvisitor
unvisitor.remove(key)
for ele in unvisitor:
if core_dist[key,ele]<eps:
unvisitor.remove(ele)
temp=core[key]+core[ele]
for tmp in temp:
if tmp in unvisitor:
unvisitor.remove(tmp)
a=list(unvisitor)
c_all.append(a)
class_res={}
for i in range(len(c_all)-1):
temp="第"+str(i)+"类"
class_res[temp]=[ ele for ele in c_all[i] if ele not in c_all[i+1] ]
print(class_res)
print(outilers)
输出:
{'第0类': [2, 3, 4, 5], '第1类': [7, 9, 10], '第2类': [6, 11, 12]}
[0, 1, 8, 13]
dbscan简单实现
猜你喜欢
转载自blog.csdn.net/huangqihao723/article/details/79260336
今日推荐
周排行