14. spark学习之旅之structured streaming(八)

1. structured streaming 官方文档

2. Spark Structured Streaming官方文档解读

3. structured streaming版本的单词统计案例

package com.xiaofan.ss

import org.apache.spark.sql.functions._
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.streaming.StreamingQuery
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}

object StructuredStreamingDemo {
  // 设置日志级别
  Logger.getLogger("org").setLevel(Level.WARN)

  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder
      .appName("StructuredNetworkWordCount")
      .master("local[2]")
      .getOrCreate()

    import spark.implicits._
    // Create DataFrame representing the stream of input lines from connection to 192.168.1.27 9999 (输入表)
    val lines: DataFrame = spark.readStream
      .format("socket")
      .option("host", "192.168.1.27")
      .option("port", 9999)
      .load()

    val words: Dataset[String] = lines.as[String].flatMap(_.split(" "))
    // Generate running word count (结果表)
    val wordCounts: DataFrame = words.groupBy("value").count()
    // Start running the query that prints the running counts to the console
    val query: StreamingQuery = wordCounts.writeStream
      .outputMode("complete")
      .format("console")
      .start()

    query.awaitTermination()
  }
}

4. 项目部分(待完善)

发布了85 篇原创文章 · 获赞 12 · 访问量 3723

猜你喜欢

转载自blog.csdn.net/fanjianhai/article/details/104456827
今日推荐