spark streaming实时计数

● 在Linux终端窗口可以直接使用yum工具进行安装:

[root@hadoop-01 ~]# yum install -y nc

● 发送数据

[root@hadoop-01 ~]# nc -lk 8866

● 使用Streaming实时计数

package com.ws.streaming
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Milliseconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}

/**
  *实时计数
  */
object StreamingWc {

  def main(args: Array[String]): Unit = {

    //离线任务是创建SparkContext;实现实时计算,用StreamingContext
    
    val conf = new SparkConf().setAppName("StreamingWc").setMaster("local[*]")
    val sc = new SparkContext(conf)

    //StreamingContext是对SparkContext的包装,包了一层就增加了实时的功能
    //第二个参数是小批次产生的时间间隔
    val ssc = new StreamingContext(sc, Milliseconds(5000))

    val data: ReceiverInputDStream[String] = ssc.socketTextStream("hadoop-01", 8866)

    val flatData: DStream[String] = data.flatMap(_.split(" "))

    val arr: DStream[(String, Int)] = flatData.map((_, 1))

    val result = arr.reduceByKey(_ + _)

    result.print()

    //启动sparksteaming程序
    ssc.start()
    //等待优雅的退出
    ssc.awaitTermination()
  }
}

猜你喜欢

转载自blog.csdn.net/bb23417274/article/details/82976366