Kafka 与Spark的集成

依赖

<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-streaming-kafka_2.10</artifactId>
    <version>1.6.2</version>
</dependency>

代码

import java.util.HashMap

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, Produc-erRecord}
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._

object KafkaWordCount {
   def main(args: Array[String]) {
      val sparkConf = new SparkConf().setAppName("KafkaWordCount")
      val ssc = new StreamingContext(sparkConf, Seconds(2))
      ssc.checkpoint("checkpoint")

      val kafkaParams = Map("metadata.broker.list" => "localhost:9092,localhost:9093,localhost:9094")
      val topics =Set("topip-lcc");

      val lines = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder](ssc, kafkaParams, topics).map(_._2)
      val words = lines.flatMap(_.split("\t"))
      val wordCountsDS = words.map(x => (x, 1))
         .reduceByKey(_+_)
      wordCountsDS.print()

      ssc.start()
      ssc.awaitTermination()
   }
}

这里写图片描述

这里写图片描述

这里写图片描述

这里写图片描述

这里写图片描述

另外一个例子:https://www.w3cschool.cn/apache_kafka/apache_kafka_integration_spark.html

猜你喜欢

转载自blog.csdn.net/qq_21383435/article/details/80624472