logistic回归又称logistic回归分析,是一种广义的线性回归分析模型,常用于数据挖掘,疾病自动诊断,经济预测等领域.以下是spark中该算法的实现方式,原地址为http://spark.apache.org/docs/latest/mllib-linear-methods.html#classification
package alg
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
import org.apache.spark.mllib.evaluation.MulticlassMetrics
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils
object logisticRegression {
def main(args:Array[String]):Unit={
val sparkConf = new SparkConf().setMaster("local").setAppName("testTansformition")
val sc = new SparkContext(sparkConf)
//1.加载数据
val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
//2.划分训练集和测试集
val splitData=data.randomSplit(Array(0.6,0.4),seed=11L)
val training=splitData(0).cache()
val test=splitData(1)
val model=new LogisticRegressionWithLBFGS()
.setNumClasses(10)
.run(training)
//4.在测试集上验证
val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
val prediction = model.predict(features)
(prediction, label)
}
val metrics=new MulticlassMetrics(predictionAndLabels)
val accuracy=metrics.accuracy
println(s"Accuracy=$accuracy")
model.save(sc, "target/tmp/scalaLogisticRegressionWithLBFGSModel")
}
}