spark 编程方式指定dataframe的 Schema

package com.immooc.spark

import com.immooc.spark.ReflectionTest.Person
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{Row, SparkSession}

object ProgramSchema {



    def main(args: Array[String]): Unit = {
      val sparkConf = new SparkConf().setMaster("local[2]").setAppName("ProgramSchema")

      val ssc = new SparkContext(sparkConf)

      val spark = SparkSession
        .builder()
        .appName("ProgramSchema")
        .config("spark.some.config.option", "some-value")
        .getOrCreate()

      import spark.implicits._
      val peopleRDD = spark.sparkContext.textFile("file:////usr/local/Cellar/spark-2.3.0/examples/src/main/resources/people.txt")
        .map(_.split(","))
        .map(line => Row(line(0), line(1).trim.toInt))

      val structType = StructType(Array(StructField("name", StringType, true),
        StructField("age", IntegerType, true)))

      val peopleDF = spark.createDataFrame(peopleRDD, structType)
      peopleDF.show()
      spark.close()

    }
  
}

http://www.waitingfy.com/archives/4325

猜你喜欢

转载自blog.csdn.net/fox64194167/article/details/80737504
今日推荐