Reading data from large data StructureStreaming json and Analysis Sample Code

Disclaimer: This article is a blogger original article, follow the CC 4.0 BY-SA copyright agreement, reproduced, please attach the original source link and this statement.
This link: https://blog.csdn.net/qq_40713537/article/details/101425809
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.types.{LongType, StringType, StructType}


/**
 * Author Vincer
 * Date 2019/09/26 10:10
 */
object StreamingDS {
    def main(args: Array[String]): Unit = {
        
        //创建sparkSession
        val spark: SparkSession = SparkSession
            .builder()
            .master("local[*]")
            .appName("StreamingDS")
            .getOrCreate()
        
        //导入隐式转换
        import spark.implicits._
        
        //创建Schema
        val peopleSchema: StructType = new StructType()
            .add("name", StringType)
            .add("age", LongType)
            .add("sex", StringType)
        
        //采集数据
        val peopleDF: DataFrame = spark.readStream
            .schema(peopleSchema)
            .json("E:\\tmp")
        
        // 筛选数据数据
        var df = peopleDF.select("name", "age", "sex").where("age>20")
        
        //数据输出
        df.writeStream
            .outputMode("append") //append模式输出
            .format("console") //打印到控制台
            .start()
            .awaitTermination()
        
    }
}

 

Guess you like

Origin blog.csdn.net/qq_40713537/article/details/101425809