spark streaming kafka 整合

package com.test.spark
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
object SparkKafKaTest {


  def main(args: Array[String]): Unit = {

    var sparkConf = new SparkConf().setMaster("local[2]").setAppName("SparkKafKaTest")

    val ssc = new StreamingContext(sparkConf, Seconds(5))


    val kafkaParams = Map[String, Object](
      "bootstrap.servers" -> "localhost:9092",
      "key.deserializer" -> classOf[StringDeserializer],
      "value.deserializer" -> classOf[StringDeserializer],
      "group.id" -> "use_a_separate_group_id_for_each_stream",
      "auto.offset.reset" -> "latest",
      "enable.auto.commit" -> (false: java.lang.Boolean)
    )

    val topics = Array("test")
    val stream = KafkaUtils.createDirectStream[String, String](
      ssc,
      PreferConsistent,
      Subscribe[String, String](topics, kafkaParams)
    )

    stream.map(record => (record.key, record.value)).print()
stream.map(record =>  record.value).flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).print()
// stream.map(_._2).count().print() ssc.start() ssc.awaitTermination() } }

1.依赖

 <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
      <version>${spark.version}</version>
    </dependency>
<dependency>
      <groupId>com.fasterxml.jackson.module</groupId>
      <artifactId>jackson-module-scala_2.11</artifactId>
      <version>${jackson.version}</version>
    </dependency>

    <dependency>
      <groupId>com.fasterxml.jackson.core</groupId>
      <artifactId>jackson-core</artifactId>
      <version>${jackson.version}</version>
    </dependency>
    <dependency>
      <groupId>com.fasterxml.jackson.core</groupId>
      <artifactId>jackson-annotations</artifactId>
      <version>${jackson.version}</version>
    </dependency>

http://www.waitingfy.com/archives/4255

2. 用spark-submit 来提交

spark-submit  \
--class com.test.spark.SparkKafKaTest \
--master local[2]  \
--packages org.apache.spark:spark-streaming-kafka-0-10_2.11:2.3.0 \
./target/spartktrain-1.0.jar \
localhost:9092 group1 test
 

猜你喜欢

转载自blog.csdn.net/fox64194167/article/details/80737477