Spark Mlib(七)用spark实现LogisticRegression

logistic回归又称logistic回归分析,是一种广义的线性回归分析模型,常用于数据挖掘,疾病自动诊断,经济预测等领域.以下是spark中该算法的实现方式,原地址为http://spark.apache.org/docs/latest/mllib-linear-methods.html#classification

package alg

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
import org.apache.spark.mllib.evaluation.MulticlassMetrics
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils

object logisticRegression {

  def main(args:Array[String]):Unit={

    val sparkConf = new SparkConf().setMaster("local").setAppName("testTansformition")
    val sc = new SparkContext(sparkConf)

    //1.加载数据
    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")

    //2.划分训练集和测试集
    val splitData=data.randomSplit(Array(0.6,0.4),seed=11L)

    val training=splitData(0).cache()

    val test=splitData(1)

    val model=new LogisticRegressionWithLBFGS()
      .setNumClasses(10)
      .run(training)

    //4.在测试集上验证
    val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
      val prediction = model.predict(features)
      (prediction, label)
    }

    val metrics=new MulticlassMetrics(predictionAndLabels)

    val accuracy=metrics.accuracy

    println(s"Accuracy=$accuracy")

    model.save(sc, "target/tmp/scalaLogisticRegressionWithLBFGSModel")
    
  }

}

猜你喜欢

转载自blog.csdn.net/fightingdog/article/details/83930155
今日推荐