版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Luomingkui1109/article/details/86291049
1.Pom.xml 加入以下依赖:
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
2.代码示例
package com.luomk
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* @author luomingkui
* @date 2018/6/16 下午9:31
* @desc 利用sparkStream计算WorldCount
* yun search nc
* 运行步骤:在服务端开启 nc -lk 9999
* 然后启动该程序
* 在服务端输入参数,在控制台就可以看见效果了
*/
object WorldCount {
def main(args: Array[String]): Unit = {
//创建SparkStream对象
val conf = new SparkConf().setAppName("Streaming").setMaster("local[*]")
//创建StreamingContext对象
val ssc = new StreamingContext(conf,Seconds(5))
//创建一个接收器来接受数据 DStream[String]
val linesDStream = ssc.socketTextStream("hadoop102",9999)
//flapMap转换为单词
val worldDStream = linesDStream.flatMap(_.split(" "))
//将单词转换为kv结构
val kvDStream = worldDStream.map((_,1))
//将相同的单词进行合并
val result = kvDStream.reduceByKey(_+_)
result.print()
ssc.start()
ssc.awaitTermination()
}
}
注意:如果程序运行时log日志太多,可以将的log4j(log4j.properties)文件里面的日志级别改成WARN,添加到resource目录下:
#
# Copyright (c) 2017. WuYufei All rights reserved.
#
log4j.rootLogger=error,stdout,R
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%t] %-c(line:%L) : %m%n
log4j.appender.R=org.apache.log4j.RollingFileAppender
log4j.appender.R.File=spark.log
log4j.appender.R.MaxFileSize=1024KB
log4j.appender.R.MaxBackupIndex=1
log4j.appender.R.layout=org.apache.log4j.PatternLayout
log4j.appender.R.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%t] %-c(line:%L) : %m%n