Spark Streaming 监控HDFS目录

package org.lm.spark.streaming

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}

object StreamingWordCountOnLine {
  def main(args: Array[String]): Unit = {
    val conf=new SparkConf().setAppName("Streaming Word Count OnLine").setMaster("spark://192.168.189.128:7077")
    val ssc=new StreamingContext(conf,Seconds(10))
    val lines=ssc.textFileStream("hdfs://192.168.189.128:9000/user/StreamingText")
    val words=lines.flatMap(_.split(" "))
    val pairs=words.map(word=>(word,1))
    val wordcounts=pairs.reduceByKey(_+_)
    wordcounts.print()
    ssc.start()
    ssc.awaitTermination()
  }

}
发布了22 篇原创文章 · 获赞 6 · 访问量 3万+

猜你喜欢

转载自blog.csdn.net/lm19770429/article/details/78841248