介绍
Flume 安装:
https://blog.csdn.net/huonan_123/article/details/86678753
Kafka安装:
https://blog.csdn.net/huonan_123/article/details/86660666
依赖
<!--spark streaming Dependency-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<!--kafka denpenderncy-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
编码
- spark streaming 编码
def main(args: Array[String]) {
System.setProperty("hadoop.home.dir", "D:\\hadoop")
// 准备工作
val conf = new SparkConf().setMaster("local[2]").setAppName("ReceiverKafkaApp")
val ssc = new StreamingContext(conf, Seconds(10))
val topic = "kafkasparkstreaming"
val numPartitions = 1
val zkQuorum = "hadoop000:2181/kafka"
var groupId = "test"
val topics = topic.split(",").map((_,numPartitions )).toMap
val messages = KafkaUtils.createStream(ssc, zkQuorum, groupId, topics)
messages.map(_._2) // 取出value
.flatMap(_.split(",")).map((_,1)).reduceByKey(_+_)
.print()
ssc.start()
ssc.awaitTermination()
}
}
- kafka编码
bin/kafka-topics.sh \
--create \
--zookeeper hadoop000:2181/kafka \
--replication-factor 1 \
--partitions 1 \
--topic kafkasparkstreaming
- flume编码
[hadoop@hadoop000 conf]$ vim exec-kafka-streaming-memory-agent.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /home/hadoop/app/test_date/flume_test.log
#a1.sources.r1.bind = localhost
#a1.sources.r1.port = 44444
# Describe the sink
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.topic = kafkasparkstreaming
a1.sinks.k1.brokerList = 192.168.1.107:9092
a1.sinks.k1.requiredAcks = 1
a1.sinks.k1.batchSize = 20
a1.sinks.k1.channel = c1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
~
- 写入监听日志文件 /home/hadoop/app/test_date/flume_test.log
[hadoop@hadoop000 bin]$ echo "ni hao a ">>/home/hadoop/app/test_date/flume_test.log
[hadoop@hadoop000 bin]$ echo "ni hao a ">>/home/hadoop/app/test_date/flume_test.log
[hadoop@hadoop000 bin]$ echo "ni hao a ">>/home/hadoop/app/test_date/flume_test.log
[hadoop@hadoop000 bin]$ echo "ni hao a ">>/home/hadoop/app/test_date/flume_test.log
[hadoop@hadoop000 bin]$ echo "ni hao a ">>/home/hadoop/app/test_date/flume_test.log
[hadoop@hadoop000 bin]$ echo "ni hao a ">>/home/hadoop/app/test_date/flume_test.log
[hadoop@hadoop000 bin]$ echo "ni hao a ">>/home/hadoop/app/test_date/flume_test.log
[hadoop@hadoop000 bin]$ echo "ni hao a ">>/home/hadoop/app/test_date/flume_test.log
- kafka consumer消费情况
[hadoop@hadoop000 kafka]$ bin/kafka-console-consumer.sh --zookeeper hadoop000:2181/kafka --topic kafkasparkstreaming --from-beginning
Using the ConsoleConsumer with old consumer is deprecated and will be removed in a future major release. Consider using the new consumer by passing [bootstrap-server] instead of [zookeeper].
ni hao a
ni hao a
ni hao a
ni hao a
ni hao a
ni hao a
hello world
hello spark
ni hao a
ni hao a
ni hao a
ni hao a
ni hao a
ni hao a
ni hao a
ni hao a
- IDEA 控制台打印spark streaming 流
-------------------------------------------
Time: 1548819950000 ms
-------------------------------------------
(ni hao a ,6)
(hello world ,1)
(hello spark ,1)
(ruoze bigdata222ee33366663,1)
(ruoze bigdata222ee3333,1)