Spark ML K-Means聚类算法

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/gdkyxy2013/article/details/90173469

        声明:代码主要以Scala为主,希望广大读者注意。本博客以代码为主,代码中会有详细的注释。相关文章将会发布在我的个人博客专栏《Spark 2.0机器学习》,欢迎大家关注。


        在我的专栏《Python从入门到深度学习》中,已经介绍了两篇关于K-Means聚类的文章,分别是:《Python实现K-Means聚类算法》《Python实现K-Means++聚类算法》。在此,K-Means算法的介绍及原理不再赘述,有兴趣的小伙伴可以点击查看我的这两篇相关文章。

        接下来,一起来看一下,使用Scala实现K-Means算法的一个实例~

【数据形式】

数据的格式大体如下图所示:

图中可以看到,数据之间的分隔符为“|”,正常的数据都有8个字段。

【代码示例】

1、首先定义如下的样例类

//样例类
case class Doc(docId1: String, docId2: String, soure: String, name: String, docName: String, country: String, typeName: String, text: String)

2、读取数据,分割并过滤掉不等于8个字段的数据

val docDataDF = spark.read.textFile(datapath + "test.txt")
      .map(x => x.split("\\|"))
      .filter(x => x.length == 8)
      .map(doc => Doc(doc(0), doc(1), doc(2), doc(3), doc(4), doc(5), doc(6), doc(7)))

3、定义正则表达式匹配

  /**
    * 去除URL、编码%0A、标点符号、数字
    * @param s
    * @return
    */
  def replaceAndSplit(s: String): Array[String] = {
    var s1 = s
    s1 = s1.replaceAll("%\\w+\\w+|\\d+", "").trim
    val targetList: String = ("""().,?[]!;|%*-""")
    for (c <- (0 until targetList.length())){
      s1 = s1.replace(targetList.charAt(c).toString, "")
    }
    val s2 = s1.split("\\,")
    s2
  }

4、定义udf函数,是想上面的正则表达式匹配

  /**
    * udf 函数
    */
  val clearData = udf{
    (words: String) => replaceAndSplit(words)
  }

5、数据清洗,提取特征向量

  /**
    * 数据清洗,提取特征向量
    * @param docDataDF
    * @return 返回每一个单词对应的TF-IDF度量值
    */
  def extractorByTFIDF(docDataDF: DataFrame): DataFrame = {
    val sourceData = docDataDF.select("typeName", "text").toDF("label", "sentence")

    //数据清洗
    val doData_1 = sourceData.withColumn("vocabulary", clearData(col("sentence")))

    //去除数组“[]”,转换为字符串
    val doData_2 = doData_1.withColumn("words2", regexp_replace(doData_1.col("vocabulary").cast(StringType), "[\\['\\]]", ""))
//    doData_2.select("label", "words2").show(5)

    //将多个空格转换为一个空格
    val doData_3 = doData_2.withColumn("words3", regexp_replace(doData_2.col("words2").cast(StringType), "\\s+", " "))
//    doData_3.select("label", "words3").show(5)

    val tokenizer = new Tokenizer()
      .setInputCol("words3")
      .setOutputCol("words")
    val tokenized = tokenizer.transform(doData_3)
    val tokenData = tokenized.select("label", "words")

    //去停用词
    val remover = new StopWordsRemover()
      .setInputCol("words")
      .setOutputCol("filtered")
    val removerData = remover.transform(tokenData).select("label", "filtered")
//    removerData.show(5, false)

    val hashingTF = new HashingTF()
      .setInputCol("filtered")
      .setOutputCol("rawFeatures")
      .setNumFeatures(100)
    val featurizedData = hashingTF.transform(removerData)

    val idf = new IDF()
      .setInputCol("rawFeatures")
      .setOutputCol("features")
    val idfModel = idf.fit(featurizedData)
    //得到每一个单词对应的TF-IDF度量值
    val rescaleData = idfModel.transform(featurizedData).select("label", "features")
    rescaleData
  }

6、使用K-Means模型进行计算

val kmeansmodel = new KMeans()
      .setK(8)
      .setFeaturesCol("features")
      .setPredictionCol("prediction")
      .fit(data)
    val results = kmeansmodel.transform(data)
//    results.show(false)
    results.collect().foreach(row => {
      println(row(0) + "is predicted as cluster" + row(2))
    })
    kmeansmodel.clusterCenters.foreach(
      center => {
        println("聚类中心:" + center)
      }
    )

7、完整代码如下:

package sparkml

import org.apache.spark.ml.clustering.KMeans
import org.apache.spark.ml.feature.{HashingTF, IDF, StopWordsRemover, Tokenizer}
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.StringType

/**
  * K-Means聚类测试
  */

//样例类
case class Doc(docId1: String, docId2: String, soure: String, name: String, docName: String, country: String, typeName: String, text: String)

object KMeansTest {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName("KMeansTest")
      .master("local[*]")
      .getOrCreate()
    import spark.implicits._

    //通过代码的方式,设置Spark log4j的级别
    spark.sparkContext.setLogLevel("WARN")

    val datapath = "./"
    val docDataDF = spark.read.textFile(datapath + "test.txt")
      .map(x => x.split("\\|"))
      .filter(x => x.length == 8)
      .map(doc => Doc(doc(0), doc(1), doc(2), doc(3), doc(4), doc(5), doc(6), doc(7)))
//    docDataDF.show()
//    docDataDF.cache()

    //选取文档ID docId1、文档名称docName、文档类别typeName、文档内容text
    val selectedData = docDataDF.select("docId1", "docName", "typeName", "text")
//    selectedData.show(5)

    //文档向量化
    val data: DataFrame = extractorByTFIDF(selectedData)

    val kmeansmodel = new KMeans()
      .setK(8)
      .setFeaturesCol("features")
      .setPredictionCol("prediction")
      .fit(data)
    val results = kmeansmodel.transform(data)
//    results.show(false)
    results.collect().foreach(row => {
      println(row(0) + "is predicted as cluster" + row(2))
    })
    kmeansmodel.clusterCenters.foreach(
      center => {
        println("聚类中心:" + center)
      }
    )
    //KMeansModel类也提供了计算集合内误差平方和(Within Set Sum of Squared Error, WSSSE)的方法来度量聚类的有效性
    //在真实k值未知的情况下,该值的变化可以作为选取合适k值的一个重要参考:输出本次聚类操作的收敛性,此值越低越好。
    val WSSSE = kmeansmodel.computeCost(data)
    println(s"Within Set Sum of Squared Errors = $WSSSE")

    val dataDF = results.select("label", "prediction").toDF("docType", "docSort")
//    dataDF.show()
    dataDF.createOrReplaceTempView("doc_table")
    val sqlstr = " select a.doctype, concat( round( a.typeTotal/b.total *100,2),'%') as ratio, a.docsort" +
      " from ( select doctype, count(doctype) as typeTotal, docsort from doc_table group by doctype, docsort) a" +
      " left join ( select count(doctype) as total, docsort from doc_table group by docsort) b" +
      " on a.docsort = b.docsort group by ratio desc"
    val distributionRatio = spark.sql(sqlstr)
    distributionRatio.show(10, false)
  }

  /**
    * 去除URL、编码%0A、标点符号、数字
    * @param s
    * @return
    */
  def replaceAndSplit(s: String): Array[String] = {
    var s1 = s
    s1 = s1.replaceAll("%\\w+\\w+|\\d+", "").trim
    val targetList: String = ("""().,?[]!;|%*-""")
    for (c <- (0 until targetList.length())){
      s1 = s1.replace(targetList.charAt(c).toString, "")
    }
    val s2 = s1.split("\\,")
    s2
  }

  /**
    * udf 函数
    */
  val clearData = udf{
    (words: String) => replaceAndSplit(words)
  }

  /**
    * 数据清洗,提取特征向量
    * @param docDataDF
    * @return 返回每一个单词对应的TF-IDF度量值
    */
  def extractorByTFIDF(docDataDF: DataFrame): DataFrame = {
    val sourceData = docDataDF.select("typeName", "text").toDF("label", "sentence")

    //数据清洗
    val doData_1 = sourceData.withColumn("vocabulary", clearData(col("sentence")))

    //去除数组“[]”,转换为字符串
    val doData_2 = doData_1.withColumn("words2", regexp_replace(doData_1.col("vocabulary").cast(StringType), "[\\['\\]]", ""))
//    doData_2.select("label", "words2").show(5)

    //将多个空格转换为一个空格
    val doData_3 = doData_2.withColumn("words3", regexp_replace(doData_2.col("words2").cast(StringType), "\\s+", " "))
//    doData_3.select("label", "words3").show(5)

    val tokenizer = new Tokenizer()
      .setInputCol("words3")
      .setOutputCol("words")
    val tokenized = tokenizer.transform(doData_3)
    val tokenData = tokenized.select("label", "words")

    //去停用词
    val remover = new StopWordsRemover()
      .setInputCol("words")
      .setOutputCol("filtered")
    val removerData = remover.transform(tokenData).select("label", "filtered")
//    removerData.show(5, false)

    val hashingTF = new HashingTF()
      .setInputCol("filtered")
      .setOutputCol("rawFeatures")
      .setNumFeatures(100)
    val featurizedData = hashingTF.transform(removerData)

    val idf = new IDF()
      .setInputCol("rawFeatures")
      .setOutputCol("features")
    val idfModel = idf.fit(featurizedData)
    //得到每一个单词对应的TF-IDF度量值
    val rescaleData = idfModel.transform(featurizedData).select("label", "features")
    rescaleData
  }
}

程序运行结果如下:(因运行结果很多,这里只截取一部分结果)

Travel & Localis predicted as cluster2
Entertainmentis predicted as cluster7
Health & Fitnessis predicted as cluster7
Casualis predicted as cluster1
News & Magazinesis predicted as cluster1
Financeis predicted as cluster7
Libraries & Demois predicted as cluster0
Familyis predicted as cluster4
Media & Videois predicted as cluster1
News & Magazinesis predicted as cluster1
Businessis predicted as cluster1
Businessis predicted as cluster5
聚类中心:[0.9236032084506792,0.8284792431437666,0.7539312657453081,4.996626898033553,1.4495074538903303,0.8272722478638311,2.5614625099496355,2.337176093691245,0.7518750866288443,1.0467598190229594,0.7096854450851546,0.891150653870225,1.0956137690166972,1.2459433704084344,0.9034247433529918,0.8513513227695271,1.2520049107850049,1.14162279075498,1.928802361969307,1.0097444584659208,1.7309446207047765,1.0580077458248878,1.1499830608351467,0.933619876377747,1.6307734967683485,0.6083621015003998,1.0099863098613795,0.7477483495664728,1.2932130224844967,1.1030185844927236,0.6817343285512232,0.9922456004254353,1.1544606242809978,0.7604353584469028,0.9636219641268766,1.2272784842018238,0.9088400554326204,0.7085003758166575,1.0026934149732338,1.3702309828553951,0.9152828400553478,1.0184394979515818,0.6722967560137725,1.1161113189732355,0.7178264705485822,1.7196685892316244,1.1249367325734265,0.8479819431339212,1.187403341946672,0.6952589786329323,0.8069736108957462,0.7185549352890377,0.867267503224848,1.084255649342063,1.0340815976757587,0.8739926492406918,1.2839676297795128,1.377182302649207,0.8768118713866994,0.7915800684571225,1.352573362649574,1.2762965303130802,1.4296637040505331,0.6934480149551032,1.1871620540968337,1.0020486376596944,2.1783396939136637,0.9194814225041318,0.8539230863732928,1.2313944185682917,1.206784944340394,1.0921859497164579,1.4753445712895397,0.8981192770689264,0.5938465670347434,0.6783381931594962,1.030053084337561,0.8912012616546561,1.8776261211296028,1.0256757139046913,0.7949081484658429,11.005454894448038,1.1482968761677093,1.114266675122521,0.745959862342443,3.259291245874953,1.539236115370727,1.0658884099430677,1.181124874308747,0.6562099135805654,2.007825638728403,0.8216727608169174,1.070652070243264,1.4009316662865294,0.6326922950697935,0.8112266000346876,0.8252056001504723,1.0573241055394358,1.087639020847353,1.2411327691604213]
聚类中心:[0.2577915481907487,0.24187063799989547,0.24630985556481852,0.23957688913191325,0.25285488962258446,0.2335725992578756,0.22857939302669636,0.21827079940576968,0.24775928629066993,0.2218443249589329,0.331296594085653,0.29886418800641923,0.24276468516713678,0.26865854107410314,0.2101044981105112,0.2513680581594059,0.25173978753235926,0.2469653461196166,0.24972511616911328,0.25477414192273573,0.2729880069645572,0.25182046318638746,0.21951510211807507,0.22939417205092033,0.29968478515707364,0.2815389964005005,0.24557950724075683,0.2743620683619651,0.23752300497192522,0.22236597665714675,0.2694379372537791,0.227674354229177,0.2197397425131297,0.31808661922172904,0.25357582541296125,0.2455096790956047,0.24253560874692862,0.2628812992759077,0.20156411506800972,0.2756893375423326,0.24941635085430117,0.2732709296465288,0.26328450191286484,0.2327181637851255,0.2672763184856487,0.21823629739753156,0.2603888680768425,0.30482956606008305,0.2539800163703044,0.2494123872455821,0.2957145301691452,0.2574002272824942,0.2530279204146034,0.21638107292181635,0.22385306541179525,0.2656843824145815,0.24222258390631463,0.2737480467338501,0.2678335455867196,0.28650323216264373,0.27442498334750215,0.21603314170711163,0.22589211010635152,0.2721500040540577,0.25770521229453325,0.2502967844130666,0.24556852168319027,0.2863601023309159,0.289246549220357,0.3340719532653631,0.24272817673815253,0.2531820703185149,0.28207890657062273,0.3058354180517309,0.26993999677037445,0.37189590183442417,0.1835991775155036,0.2563337681534668,0.27472731107630777,0.2205331378439355,0.2916928995551411,0.2636916685440801,0.26998112980551814,0.25829062048720397,0.2075903046789032,0.24677293796360347,0.28416288416682123,0.22893344279592798,0.26421397688029524,0.2783961095933836,0.2010470028037374,0.25586457830095627,0.27300919895225984,0.3237450081760006,0.3524002657989988,0.31026933760091213,0.24554133063267086,0.280593472308155,0.26254276491682116,0.274124091414408]
聚类中心:[2.1936935571900107,1.9464118325346789,1.8127455375958357,12.856021834274635,3.196242979606894,1.8327203709576319,6.60923466607349,6.145588868343169,1.6304765773280192,2.4037561281537694,1.6218721582172888,1.9176090338660297,2.5563242485825657,2.5330776636145123,2.1761368851337455,2.0351703600725837,3.418287988622966,2.4882203291565053,4.7090289070762985,2.615375092955452,4.348355717801365,2.183213865012847,2.7460140997128923,2.1084925958613785,4.2133519700113675,1.4728077412253382,2.6930489913561377,1.586813422515331,3.064184237663189,2.567423520789301,1.3415123424671964,2.3337570299573485,2.9019216419970078,1.6024313945308815,2.2703382973715356,2.88243387058489,2.185906863994649,1.5211946226754227,2.165641985123635,3.296914774639655,2.3657616129738783,2.143822965872875,1.4970035109771866,2.7055707677328655,1.6132575897980483,3.9542260865140504,2.7890697045577935,1.6756054387521568,2.5999848962652448,1.5844996407451262,1.740164757878865,1.6274602177865818,2.15671407849582,2.2957344245267453,2.537250629465546,1.952085253028444,2.9341523451834983,3.3050071687865468,2.1570462592147925,1.7539532021426152,3.07171182779381,3.0403976091592977,3.705196437101558,1.4269930582887191,3.040126272667546,2.614644835562038,5.044014599662779,1.9463550956731372,1.7605885374307455,2.6809230067707532,3.017104955402999,2.4743113880697867,3.49565699569951,2.4251256677425994,1.4260543271362356,1.4191583090181965,2.4416074412501936,2.1887383779527236,4.137468621401845,2.8053247855932137,1.843002311878194,28.662906878946327,2.6634711053559568,2.2384824820312637,1.8492445533493866,9.72949090863246,4.159371998056375,2.316569863503567,3.003194418307522,1.3965769485727748,5.108385482353493,1.936578315850713,2.4890502080925234,3.749979479087212,1.3490516960487906,1.7064657811129953,1.7641649628908946,2.6384241548527574,3.115277541396776,3.1577298360812738]
聚类中心:[2.3861395213491665,1.3412103472088037,1.271067179940959,1.040451632258466,1.2038289764179546,2.9871551025952248,1.3798793938593181,0.9405472757770614,1.4650396777190282,1.5842678769966254,1.7646610371911724,2.2954157740341232,1.9662581932890422,1.622120545455118,1.3110318901980575,2.539136185868194,3.0090601596578277,1.3222024639851395,1.1875075004965305,6.467069852628031,1.3282730022891986,2.4336474001424375,1.5892529112307514,1.3372869452106644,2.4211682368136525,1.9221247678975983,1.72385375342219,1.8083476933944587,2.2850237810808913,1.1984349933631049,1.8655830227806902,1.7757890385470616,1.510689525080976,2.7438432313586456,1.3101363114149653,1.0172910475182326,2.4134114027501328,1.8144245028629085,1.2083783669112993,1.754845036560174,1.425998442065561,5.677199808152487,1.7244767215236836,1.4542896330675512,2.597620473034019,1.231135352324462,6.862600861164776,1.7801337878184127,1.1457916647001445,1.4253292527040493,1.8149153870040406,1.7045692009853057,2.187465809589689,1.3293011009479883,1.6226496386554567,2.7758916811124537,1.4779575032735128,1.7623821077140105,2.481283931055477,1.598505223018586,1.3397779640487666,1.310651739120562,1.2205922644569482,1.9964230937260912,2.0339057962213,1.6350312044622206,2.3745153683184324,1.9873426470356435,1.7002521567144415,2.050799777392442,1.5376843532200786,1.3246894466260573,1.6767143763562509,1.6562654049860397,1.416718321330371,2.635026980968954,1.0650918281996162,1.4374719495140302,1.4775556703776354,2.2700367396731487,2.412670057982965,1.236504220871974,1.8025773423900358,1.7512391376553766,1.9690563782562172,1.730727553076226,1.3651785876338747,1.7517448611843613,1.9047976972423453,1.9982374585128706,1.0312231305291015,1.336034164304123,1.9631068413640989,3.7954682072788364,1.5583639926827066,1.6483237916579452,1.8342663892711744,1.8873571119262957,1.6386050094361913,1.416429975705314]
聚类中心:[1.4812952058173896,1.4277516047128993,1.554963453597877,1.3421903299824345,1.537997778737668,1.4722421495149276,1.3854931245875752,1.2621590895969816,1.7209902013216773,1.4369347087798798,1.7542651813949768,1.625558153551786,1.497478312708841,1.7264506749101225,1.4981077658391317,1.4974232592543393,1.4795476261850575,1.4089923078431756,1.4126228437226909,1.4313017506586323,1.5568649598990452,1.4225026292734206,1.431888376688066,1.384874112681258,1.6643315873863858,1.7392148769016222,1.5696634460964263,1.6834892509814572,1.5622780841604418,1.2203579466393508,1.6835147030378257,1.4301732698629608,1.4617515161446477,1.558264950382333,1.6029731113761376,1.457810128914284,1.549445194516931,1.6133492001423673,1.2418965816431602,1.3858518235127424,1.5365239406132776,1.597964821820288,1.543140521096137,1.3622103940240853,1.6159593824424308,1.3704842977384164,1.4221169858941591,1.5673530397596904,1.5336425281556763,1.5703290160246322,1.4896540335252346,1.5531490347989956,1.5448776243952236,1.3574995121223647,1.4445372379601715,1.677829317362297,1.437933939953297,1.6834806574854868,1.5546207678350203,1.5636046681178228,1.532685658607185,1.3761388761135305,1.390245704974441,1.621011839117762,1.5302492057044335,1.4801335931176398,1.5406174781768354,1.6379628854434145,1.613550062870071,1.4379828994674804,1.4760338180580768,1.4714265809407052,1.4830134203179912,1.816063492064299,1.5525190959296828,2.145321746415765,1.1999103630803754,1.6204667644523496,1.5142384373928377,1.234715286559348,1.4071426538403553,1.1213863178174042,1.5560028962643389,1.560521889150307,1.4291550236927593,1.2258646335356858,1.5832350712669876,1.2342274267712954,1.4291777439165396,1.7442799100651996,1.2696361339379454,1.6205046412037696,1.6201718281763022,1.7090871685444673,1.7037246122353222,1.8611293842029346,1.684470612879191,1.5093489964221964,1.3884899420575736,1.4368610610592165]
聚类中心:[2.6374773293682394,2.6096401953979074,3.0228608226509506,2.689300994718686,2.9301726561609236,2.73584392208443,2.4653101246942537,2.3030042136500186,2.8058152244059706,2.46674381051459,3.070674957609549,2.924783864497053,2.729053739998293,3.150412426287807,2.8301183468226796,2.7662511479541085,2.658157688147762,2.4927755862326175,2.6805706447609565,2.4817720783842443,2.9042023026739225,2.332779904762233,2.4654650669762916,2.444003793424573,2.8881003549780506,3.1815442520085666,2.998415306138045,2.822668534270005,2.7576732921694953,2.15775005771494,2.867425419423499,2.6777708945302097,2.4893339282068188,2.7023668175716367,2.9016430682630165,2.873690600587043,2.8570289212620956,2.583547275490781,2.2435897411167525,2.5831326579636715,2.8796787916054742,2.8219590476298877,2.683105832868891,2.433704486162273,3.006028059268834,2.604839453209477,2.704770805206836,2.7148041394671485,2.751459027456242,2.804785780469584,2.7433859327990864,2.9448941749785127,2.735328797802879,2.467041235376296,2.6273437268414193,2.770145468813472,2.7680949702017696,3.053266053827228,3.0918428076917923,2.744769875816202,2.6409511393227034,2.446779831207196,2.5064614922381696,2.861943605534085,2.6891776804139322,2.691529794811247,2.795489082417782,2.8598850676805707,2.960779340148976,2.4788664210092883,2.6595689338156534,2.6049683439896834,2.745089570602758,3.1129151856729633,2.7656172983037903,3.1148746196759904,2.312927755191888,2.8699797988644176,2.7831165711283874,2.3011122629037053,2.4412924994601606,2.3457911156474567,2.7555003716869257,2.831331926106164,2.6200755197477914,2.382718989779807,2.7709853336999597,2.1339792923199608,2.6165708747861145,3.0591377796947437,2.443753369659591,2.7571944780506934,3.011032861396932,2.851358857993664,2.7718327328899717,3.2990901625425537,3.1145309223462743,2.692310060553643,2.5496292701918546,2.7742795152591304]
聚类中心:[1.8726457219719441,2.4919091268685984,7.245355567975445,2.8918162663659097,1.0922717714898134,1.904253060510106,3.999077051997617,2.3858963107627074,1.3158333090491308,2.450223478865057,1.6804995283857127,2.0228477916655305,1.4066108564499438,1.372125641891049,1.7424534077219018,2.222282419316242,1.2062132119816404,10.351238816000935,2.8246909071023936,1.4591291917465454,2.6232852298670393,1.718433617782702,4.815917583032972,4.520077716361352,1.1297732839042023,2.1801962488070386,5.7562499091772015,2.6599656159852145,1.6344851310965534,13.242378046872805,1.3442041623279002,2.1060471677174544,2.493471059514586,2.904650292233565,1.5115011741100461,1.5519149685947788,1.113698767624646,1.7467826555451529,1.7861782320618929,2.4039236340243937,1.979997319247627,1.8413142909244526,1.243828771884123,2.320413625195787,1.8092033432006174,2.2313025057261853,2.884154656419862,1.9519563454742277,3.606428463869162,2.555768773670088,1.3104973237366702,1.7269504370278324,1.3037543661783362,1.6443280084744196,3.4946131850875166,2.342483962972605,5.196908603230425,1.2003409818920039,1.3611022098239742,1.3394596727652504,4.05937347346273,4.779194583330867,2.930575097933631,1.0952760927568506,1.5125566116783067,2.1178774400921303,1.769750977534709,1.1959193238520636,1.5983043542800526,2.043087439649464,2.543853425681048,2.146941946126256,1.5029918171359438,1.197403683156586,1.5592640491562182,1.7819314442940841,6.695355422729497,2.848038503418138,1.9760723865493721,1.556156478094664,1.931656609674562,1.596601250776561,3.333287900870891,2.175810630353628,1.8729220901546217,1.827493179340965,2.807915440252971,3.9682302044609816,1.2832474795361564,1.6412914086163781,1.863561104400018,2.809430444345099,1.5651161075172013,1.8631155734844798,1.825038503408034,1.8661776677630546,1.7470439415935353,1.2579244563655692,1.852519327969852,1.210409128689551]
聚类中心:[0.7665001773500252,0.721004157204287,0.8874554220677424,0.6908624542125192,0.7751502536299126,0.7302476017963732,0.7000843334801672,0.6396530846230754,0.8278049552129543,0.6956983858158138,0.9513926689574894,0.8147999482196601,0.7747349611210713,0.8500616873692866,0.7020629935404633,0.7624793187951984,0.780328741802512,0.7834486104968974,0.7369297385088172,0.803361612823239,0.8282374127375899,0.7767603636202586,0.7163982085271502,0.7501257108255572,0.8848410430527354,0.9025666874671249,0.7932206122828078,0.8402089116439209,0.7862015602612752,0.6876789276604829,0.8618457966061297,0.7309044349930899,0.7015950988577127,0.8973939615047957,0.776458501034684,0.7734015920493346,0.7984236045791928,0.8953600147307399,0.6344092278431509,0.7460048996193738,0.7581556304721846,0.9068708261965562,0.8280732254833391,0.6888661005781597,0.848324004608722,0.6932555963845511,0.807021691727888,0.8435963481909607,0.733394974825325,0.7858025506057481,0.7852735045563628,0.7734325896623624,0.7712990736460238,0.6834156631956863,0.7506337767495084,0.8587270197296489,0.7405145869068186,0.8570886750475373,0.7938437147319487,0.8736807348639947,0.8161884309283033,0.6866336428151895,0.7268157472335992,0.8239430588721876,0.7847054250612326,0.7593571084402183,0.8035319502785011,0.8246381288729089,0.8743667192138512,0.8425586095509299,0.7414306967570362,0.7732702721011417,0.7243298489077397,0.9826173879432454,0.807028510667044,1.1492676643513406,0.6092570675083697,0.8057575218457023,0.7998535785938987,0.6504510647503513,0.7730353477008336,0.6456416470942364,0.8361068168801697,0.7916758717774558,0.6854086054163615,0.6690502225808046,0.8316875114588818,0.6951225774752177,0.7546567145085766,0.9107724738048506,0.5979632946911535,0.7831446471501249,0.8361374595971662,0.9508760256048628,0.9494695987916554,0.9498861164101694,0.8122212181024768,0.814690019257693,0.7324664151706161,0.7481931141534023]
Within Set Sum of Squared Errors = 5.830304865438332E7

你们在此过程中遇到了什么问题,欢迎留言,让我看看你们都遇到了哪些问题。

猜你喜欢

转载自blog.csdn.net/gdkyxy2013/article/details/90173469