1. Install and configure zk
2. Install and configure Kafka
3. Start zk
4. Start Kafka
5. Create topic
[root@mini3 kafka]# bin/kafka-console-producer.sh --broker-list mini1:9092 --topic cyf-test
code
package org.apache.spark import java.net.InetSocketAddress import org.apache.spark.HashPartitioner import org.apache.spark.SparkConf import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.flume.FlumeUtils import org.apache.spark.streaming.kafka.KafkaUtils object KafkaWordCount { val updateFunction = (iter: Iterator[(String, Seq[Int], Option[Int])]) => { iter.flatMap { case (x, y, z) => Some(y.sum + z.getOrElse(0)).map(v => (x, v)) } } def main(args: Array[String]) { val conf = new SparkConf().setMaster("local[2]").setAppName("KafkaWordCount") SSC Val = new new StreamingContext (the conf, Seconds The (. 5 )) // rollback point is set in the local // ssc.checkpoint ( "./") // will be written to the rollback point HDFS ssc.checkpoint ( "HDFS: // mini1: 9000 / kafkatest " ) //val Array(zkQuorum, groupId, topics, numThreads) = args val Array(zkQuorum, groupId, topics, numThreads) = Array[String]("mini1:2181,mini2:2181,mini3:2181", "g1", "cyf-test", "2") val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, groupId, topicMap).map(_._2) val results = lines.flatMap(_.split(" ")).map((_, 1)).updateStateByKey(updateFunction, new HashPartitioner(ssc.sparkContext.defaultParallelism), true) results.print() ssc.start() ssc.awaitTermination () } }
Hutchison problems encountered in https://www.cnblogs.com/feifeicui/p/11018761.html