SVM(Support Vector Machine)指的是支持向量机,是常见的一种判别方法。在机器学习领域,是一个有监督的学习模型,通常用来进行模式识别、分类以及回归分析。下面是spark官网给出的例子。原网址为http://spark.apache.org/docs/latest/mllib-linear-methods.html#classification
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.util.MLUtils
object spark_svm {
def main(args :Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local").setAppName("testTansformition")
val sc = new SparkContext(sparkConf)
//加载训练数据 LIBSVM数据格式.
val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
// 划分训练集和测试机集(训练集60%,测试集40%)
val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
val training = splits(0).cache()
val test = splits(1)
// 训练模型
val numIterations = 100
val model = SVMWithSGD.train(training, numIterations)
// 清楚默认阈值
model.clearThreshold()
// 对测试集进行预测
val scoreAndLabels = test.map { point =>
val score = model.predict(point.features)
(score, point.label)
}
//获取评价指标
val metrics = new BinaryClassificationMetrics(scoreAndLabels)
val auROC = metrics.areaUnderROC()
println(s"Area under ROC = $auROC")
// 保存和加载模型示例
model.save(sc, "target/tmp/scalaSVMWithSGDModel")
val sameModel = SVMModel.load(sc, "target/tmp/scalaSVMWithSGDModel")
Thread.sleep(30*30*1000);
}
}