Spark Mlib(六)用spark实现贝叶斯分类器

贝叶斯分类器是各种分类器中分类错误概率最小或者在预先给定代价的情况下平均风险最小的分类器。下面是spark官网(http://spark.apache.org/docs/latest/mllib-naive-bayes.html)给出的例子

package alg
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel}
import org.apache.spark.mllib.util.MLUtils
object naiveBayes {

  def main(args:Array[String]):Unit={


    val sparkConf = new SparkConf().setMaster("local").setAppName("testTansformition")
    val sc = new SparkContext(sparkConf)

    val data=MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
    val Array(traning,test)=data.randomSplit(Array(0.6,0.4))

    val model=NaiveBayes.train(traning,lambda = 1.0,modelType ="multinomial")

    val predictAndLabel=test.map(p=>(model.predict((p.features)),p.label))


    val accuracy=1.0*predictAndLabel.filter(x=>x._1==x._2).count()/test.count()

    print("accuracy:"+accuracy)
    //save and load model
    model.save(sc, "target/tmp/myNaiveBayesModel")
    val sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel")

  }


}

猜你喜欢

转载自blog.csdn.net/fightingdog/article/details/83867577
今日推荐