Generar datos acumulativos, incluidos datos históricos

1. Utilice el operador updateStateByKey para acumular registros históricos

Utilice streamingContext.chekcpoint para almacenar en caché datos históricos

2. Implementación del código

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.DStream

object sparkStreaming02 {



    //todo  定义方法将历史结果累加
    def updateFunc(inputSum: Seq[Int], resultSum: Option[Int]): Option[Int] = {
      val finalResult = inputSum.sum + resultSum.getOrElse(0)

      Option(finalResult)
    }

    def main(args: Array[String]): Unit = {
      //创建函数,使用update
      /**
        * 两个参数
        * Seq    传入的值
        * Option  将历史数据保留下来
        * 返回值低optain
        */
      val sparkContext = new SparkContext(new SparkConf().setAppName("hdfsStreaming").setMaster("local[4]"))
      sparkContext.setLogLevel("WARN")
      val streamingContext = new StreamingContext(sparkContext, Seconds(5))

      val streaming = streamingContext.textFileStream("hdfs://node01:8020/sparkStreaming")
      //将历史数据保存在某个目录下
      streamingContext.checkpoint("./check_point")

      val finalkey: DStream[(String, Int)] = streaming.flatMap(_.split(" ")).map((_, 1)).updateStateByKey(updateFunc)
      // 将结果打印
      finalkey.print()

      streamingContext.start()

      streamingContext.awaitTermination()
    }
}

Supongo que te gusta

Origin blog.csdn.net/weixin_44429965/article/details/107416111
Recomendado
Clasificación