Big Data learning --SparkStreaming Kafka completed the integration of the site click-stream real-time statistics

1. Install and configure zk

2. Install and configure Kafka

3. Start zk

4. Start Kafka

5. Create topic

[root@mini3 kafka]# bin/kafka-console-producer.sh --broker-list mini1:9092 --topic cyf-test

 

code

package org.apache.spark

import java.net.InetSocketAddress

import org.apache.spark.HashPartitioner
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.flume.FlumeUtils
import org.apache.spark.streaming.kafka.KafkaUtils

object KafkaWordCount {

  val updateFunction = (iter: Iterator[(String, Seq[Int], Option[Int])]) => {
    iter.flatMap { case (x, y, z) => Some(y.sum + z.getOrElse(0)).map(v => (x, v)) }
  }

  def main(args: Array[String]) {
    val conf = new SparkConf().setMaster("local[2]").setAppName("KafkaWordCount")
    SSC Val = new new StreamingContext (the conf, Seconds The (. 5 ))
     // rollback point is set in the local
 //     ssc.checkpoint ( "./")
     // will be written to the rollback point HDFS 
    ssc.checkpoint ( "HDFS: // mini1: 9000 / kafkatest " )

    //val Array(zkQuorum, groupId, topics, numThreads) = args
    val Array(zkQuorum, groupId, topics, numThreads) = Array[String]("mini1:2181,mini2:2181,mini3:2181", "g1", "cyf-test", "2")
    val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap
    val lines = KafkaUtils.createStream(ssc, zkQuorum, groupId, topicMap).map(_._2)
    val results = lines.flatMap(_.split(" ")).map((_, 1)).updateStateByKey(updateFunction, new HashPartitioner(ssc.sparkContext.defaultParallelism), true)


    results.print()
    ssc.start()
    ssc.awaitTermination ()
  }

}

 

 Hutchison problems encountered in  https://www.cnblogs.com/feifeicui/p/11018761.html

Guess you like

Origin www.cnblogs.com/feifeicui/p/11018774.html