[flink]#32_扩展库:Machine Learning

<dependency> 
	<groupId>org.apache.flink</groupId> 
	<artifactId>flink-ml_2.11</artifactId> 
	<version>1.8.0</version>
</dependency>
//带标签的特征向量 
val trainingData: DataSet[LabeledVector] = ... 
val testingData: DataSet[Vector] = ...

val dataSet: DataSet[LabeledVector] = ...

//使用 Splitter 将数据集拆分成训练数据和测试数据 
val trainTestData: DataSet[TrainTestDataSet] = Splitter.trainTestSplit(dataSet) 
val trainingData: DataSet[LabeledVector] = trainTestData.training 
val testingData: DataSet[Vector] = trainTestData.testing.map(lv => lv.vector)

val mlr = MultipleLinearRegression() .setStepsize(1.0) .setIterations(100) .setConvergenceThreshold(0.001)

mlr.fit(trainingData)

//已经形成的模型可以用来预测数据了 
val predictions: DataSet[LabeledVector] = mlr.predict(testingData)

Flink-ML Pipeline 使用

val trainingData: DataSet[LabeledVector] = ... 
val testingData: DataSet[Vector] = ...

val scaler = StandardScaler() 
val polyFeatures = PolynomialFeatures().setDegree(3) 
val mlr = MultipleLinearRegression()

// Construct pipeline of standard scaler, polynomial features and multiple linear regression 
//构建标准定标器、多项式特征和多元线性回归的流水线 val pipeline = scaler.chainTransformer(polyFeatures).chainPredictor(mlr)

// Train pipeline 
pipeline.fit(trainingData)

// Calculate predictions 
val predictions: DataSet[LabeledVector] = pipeline.predict(testingData)
发布了78 篇原创文章 · 获赞 0 · 访问量 1398

猜你喜欢

转载自blog.csdn.net/qq_30782921/article/details/103588432