package com.home.spark.ml import org.apache.spark.SparkConf import org.apache.spark.ml.feature.VectorAssembler import org.apache.spark.ml.linalg.Vectors import org.apache.spark.sql.SparkSession /** * VectorAssembler是一种转换器,它将给定的多个列组合为单个向量列。 * 这对于将原始特征和由不同特征转换器生成的特征组合到单个特征向量中很有用,以便训练诸如逻辑回归和决策树之类的ML模型。 * * VectorAssembler接受以下输入列类型:所有数字类型,布尔类型和向量类型。在每一行中,输入列的值将按指定顺序连接到向量中。 **/ object Ex_VectorAssembler { def main(args: Array[String]): Unit = { val conf: SparkConf = new SparkConf(true).setMaster("local[2]").setAppName("spark ml") val spark = SparkSession.builder().config(conf).getOrCreate() val dataset = spark.createDataFrame( Seq((0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0)) ).toDF("id", "hour", "mobile", "userFeatures", "clicked") val assembler = new VectorAssembler() .setInputCols(Array("hour", "mobile", "userFeatures")) .setOutputCol("features") val output = assembler.transform(dataset) println("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column 'features'") output.select("*").show(false) spark.stop() } }
Assembled columns 'hour', 'mobile', 'userFeatures' to vector column 'features'
+---+----+------+--------------+-------+-----------------------+
|id |hour|mobile|userFeatures |clicked|features |
+---+----+------+--------------+-------+-----------------------+
|0 |18 |1.0 |[0.0,10.0,0.5]|1.0 |[18.0,1.0,0.0,10.0,0.5]|
+---+----+------+--------------+-------+-----------------------+
Spark VectorAssembler 向量装配转换器
猜你喜欢
转载自www.cnblogs.com/asker009/p/12206704.html
今日推荐
周排行