3. structured streaming版本的单词统计案例
package com.xiaofan.ss
import org.apache.spark.sql.functions._
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.streaming.StreamingQuery
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
object StructuredStreamingDemo {
// 设置日志级别
Logger.getLogger("org").setLevel(Level.WARN)
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder
.appName("StructuredNetworkWordCount")
.master("local[2]")
.getOrCreate()
import spark.implicits._
// Create DataFrame representing the stream of input lines from connection to 192.168.1.27 9999 (输入表)
val lines: DataFrame = spark.readStream
.format("socket")
.option("host", "192.168.1.27")
.option("port", 9999)
.load()
val words: Dataset[String] = lines.as[String].flatMap(_.split(" "))
// Generate running word count (结果表)
val wordCounts: DataFrame = words.groupBy("value").count()
// Start running the query that prints the running counts to the console
val query: StreamingQuery = wordCounts.writeStream
.outputMode("complete")
.format("console")
.start()
query.awaitTermination()
}
}
4. 项目部分(待完善)