Spark Streaming + Kafka Wizard of integration createDirectStream

 

Start zk: zkServer.sh start

启动kafka:kafka-server-start.sh $KAFKA_HOME/config/server.properties

Create a topic: kafka-topics.sh --create --zookeeper node1: 2181 --replication-factor 1 --partitions 1 --topic test

Start a producer: kafka-console-producer.sh --broker-list node1: 9092 --topic test

Test run the code:

package com.lin.spark

import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe

/**
  * Created by Administrator on 2019/6/7.
  */
object Halo {
  def main(args: Array[String]): Unit = {
    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "node1:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "use_a_separate_group_id_for_each_stream",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (true: java.lang.Boolean)
    )

    val conf = new SparkConf().setAppName("Halo").setMaster("local[2]")
    val ssc = new StreamingContext(conf,Seconds(5))

    val topics = Array("test")
    val stream = KafkaUtils.createDirectStream[String, String](
      SSC,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams)
    )

    stream.foreachRDD (eet => {
      Val offsetRange = rdd.asInstanceOf [HasOffsetRanges] .offsetRanges
      Maped Val: RDD [(String, String)] = rdd.map (Record => (record.key, record.value))
       // computation logic 
      maped.foreach (the println)
       // loop output 
      for (O <- OffsetRange) {
        println(s"${o.topic}  ${o.partition} ${o.fromOffset} ${o.untilOffset}")
      }
    })

    ssc.start()
    ssc.awaitTermination ()
  }
}

 

reference:

http://spark.apache.org/docs/2.2.0/streaming-kafka-0-10-integration.html

https://cloud.tencent.com/developer/article/1355430

Guess you like

Origin www.cnblogs.com/linkmust/p/10990848.html