聚类分析初摸索


library("factoextra")
library("NbClust")
data("USArrests")
data =  scale(USArrests)
head(data, n=5)

fviz_nbclust(data, kmeans, method = "wss") + geom_vline(xintercept = 4, linetype = 2)
set.seed(1234) #因为method选择的是kmeans,所以如果不设定种子,每次跑得结果可能不同

#利用k-mean是进行聚类
km_fit = kmeans(data, 4, nstart = 25) ## 重复做kmeans的次数
print(km_fit)

fviz_cluster(km_fit, data, palette = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),  geom = "point" ,ellipse.type = "convex", star.plot = TRUE, repel = TRUE, ggtheme = theme_grey() )

#先求样本之间两两相似性 
result <- dist(data, method = "euclidean")
#产生层次结构
result_hc <- hclust(d = result, method = "ward.D2")
#进行初步展示
fviz_dend(result_hc, cex = 0.6)

fviz_dend(result_hc, k = 4, cex = 0.5, k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07") )

猜你喜欢

转载自blog.csdn.net/rojyang/article/details/85098718