● 在Linux终端窗口可以直接使用yum工具进行安装:
[root@hadoop-01 ~]# yum install -y nc
● 发送数据
[root@hadoop-01 ~]# nc -lk 8866
● 使用Streaming实时计数
package com.ws.streaming
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.{Milliseconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
/**
*实时计数
*/
object StreamingWc {
def main(args: Array[String]): Unit = {
//离线任务是创建SparkContext;实现实时计算,用StreamingContext
val conf = new SparkConf().setAppName("StreamingWc").setMaster("local[*]")
val sc = new SparkContext(conf)
//StreamingContext是对SparkContext的包装,包了一层就增加了实时的功能
//第二个参数是小批次产生的时间间隔
val ssc = new StreamingContext(sc, Milliseconds(5000))
val data: ReceiverInputDStream[String] = ssc.socketTextStream("hadoop-01", 8866)
val flatData: DStream[String] = data.flatMap(_.split(" "))
val arr: DStream[(String, Int)] = flatData.map((_, 1))
val result = arr.reduceByKey(_ + _)
result.print()
//启动sparksteaming程序
ssc.start()
//等待优雅的退出
ssc.awaitTermination()
}
}