Configuration prerequisite 1
1.1 Import data into kafka 1.2 Read data with flume. You need to install msyql5.7. You can read my previous article.
#1 Unzip the folder and move it to the specified location cd /opt/jar tar -zxf flume-ng-1.6.0-cdh5.14.2.tar.gz mv apache-flume-1.6.0-cdh5.12.0-bin/ /opt/ soft/flume160 tar -zxf kafka_2.11-2.0.0.tgz mv kafka_2.11-2.0.0 /opt/soft/kafka200 # 2 Configure kafka configuration file vim /opt/soft/kafka200/config/server.properties <<======================================>> listeners=PLAINTEXT://192.168 .64.210:9092 log.dirs=/opt/soft/kafka200/data zookeeper.connect=192.168.64.210:2181 <<======================= ================>> # 3 Configure flume configuration file cp flume-env.sh.template flume.env.sh vim /opt/soft/flume160/conf//flume -env.sh <======================================> export JAVA_HOME=/opt/soft/jdk180 <========================================> # 4 Configure environment variables vim /etc/profole ========================================= #kafka env export KAFKA_HOME=/opt/soft/kafka200 export PATH=$PATH:$KAFKA_HOME/bin #flume env export FLUME_HOME=/opt/soft/flume160 export PATH=$PATH:$FLUME_HOME/bin ===== ================================== #Activate configuration source /etc/profile #5 Start zookeper zkServer.sh start # 6 Start kafka kafka-server-start.sh /opt/soft/kafka200/config/server.properties #Create 3 partitions kafka-topics.sh --create --zookeeper 192.168.64.210:2181 --topic event_attendess_raw --replication-factor 1 --partitions 3
Configuration conf file
#查看源文件大小 cd /opt/data/attendees/ ls wc -l event_attendees.csv.COMPLETED cat event_attendees.csv.COMPLETED #配置第一个文件(event_attendees.conf) a1.channels= c1 a1.sources= s1 a1.sinks= k1 a1.sources.s1.type= spooldir a1.sources.s1.channels= c1 a1.sources.s1.spoolDir= /opt/data/attendees a1.sources.s1.deserializer.maxLineLength=120000 a1.sources.s1.interceptors=i1 a1.sources.s1.interceptors.i1.type=regex_filter a1.sources.s1.interceptors.i1.regex=event.* a1.sources.s1.interceptors.i1.excludeEvents=true a1.channels.c1.type = memory a1.sinks.k1.channel = c1 a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink a1.sinks.k1.kafka.topic = event_attendees_raw a1.sinks.k1.kafka.bootstrap.servers = 192.168.64.210:9092 a1.sinks.k1.kafka.flumeBatchSize = 20 a1.sinks.k1.kafka.producer.acks = 1 #配置第二个文件(user_friends.conf) a1.channels= c1 a1.sources= s1 a1.sinks= k1 a1.sources.s1.type= spooldir a1.sources.s1.channels= c1 a1.sources.s1.spoolDir= /opt/data/uf a1.sources.s1.deserializer.maxLineLength=60000 a1.sources.s1.interceptors=i1 a1.sources.s1.interceptors.i1.type=regex_filter a1.sources.s1.interceptors.i1.regex=event.* a1.sources.s1.interceptors.i1.excludeEvents=true a1.channels.c1.type = memory a1.sinks.k1.channel = c1 a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink a1.sinks.k1.kafka.topic = user_friends_raw a1.sinks.k1.kafka.bootstrap.servers = 192.168.64.210:9092 a1.sinks.k1.kafka.flumeBatchSize = 20 a1.sinks.k1.kafka.producer.acks = 1
#View the number of files cd /opt/data/uf wc -l user_friends.csv
Upload event to kafka
#配置第三个文件(events_raw.conf) a1.channels= c1 a1.sources= s1 a1.sinks= k1 a1.sources.s1.type= spooldir a1.sources.s1.channels= c1 a1.sources.s1.spoolDir= /opt/data/events a1.sources.s1.interceptors=i1 a1.sources.s1.interceptors.i1.type=regex_filter a1.sources.s1.interceptors.i1.regex=event.* a1.sources.s1.interceptors.i1.excludeEvents=true a1.channels.c1.type = memory a1.sinks.k1.channel = c1 a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink a1.sinks.k1.kafka.topic = events_raw a1.sinks.k1.kafka.bootstrap.servers = 192.168.64.210:9092 a1.sinks.k1.kafka.flumeBatchSize = 20 a1.sinks.k1.kafka.producer.acks = 1
Delete message queue
#Must be configured to true to delete
vim /opt/soft/kafka200/config/server.properties
delete.topic.enable=true
#View the number of topics
kafka-topics.sh --zookeeper 192.168.64.210:2181 --list #
Delete topic
kafka-topics.sh --zookeeper 192.168.64.210:2181 --topic events_raw --delete
#View the file header
cd /opt/data/events
cat events.csv.COMPLETED | head -1
Read events file
#The first step is to monitor kafka kafka-console-consumer.sh --bootstrap-server 192.168.64.210:9092 --topic events_raw #The second step Flume starts writing files flume-ng agent -n a1 -c /opt/soft /flume160/conf/ -f /opt/flumecfg/events.conf -Dflume.root.looger=INFO,console #The third step is to view the data in kafka kafka-run-class.sh kafka.tools.GetOffsetShell --broker -list 192.168.64.210:9092 --topic events_raw #View the number of topics kafka-topics.sh --zookeeper 192.168.64.210:2181 --list #Delete topic kafka-topics.sh --zookeeper 192.168.64.210:2181 - -topic events_raw --delete #View deletion log cd /opt/soft/kafka200/kafka-logs ls
Read in the user_friends file
#The first step is to monitor kafka kafka-console-consumer.sh --bootstrap-server 192.168.64.210:9092 --topic user_friends_raw #The second step Flume starts writing files flume-ng agent -n a1 -c /opt/soft /flume160/conf/ -f /opt/flumecfg/user_friends.conf -Dflume.root.looger=INFO,console #The third step is to view the data in kafka kafka-run-class.sh kafka.tools.GetOffsetShell --broker -list 192.168.64.210:9092 --topic user_friends_raw #View the number of topics kafka-topics.sh --zookeeper 192.168.64.210:2181 --list #Delete topic kafka-topics.sh --zookeeper 192.168.64.210:2181 - -topic user_friends.csv --delete #View deletion log cd /opt/soft/kafka200/kafka-logs ls
Read the event_attendees file
#The first step is to monitor kafka kafka-console-consumer.sh --bootstrap-server 192.168.64.210:9092 --topic event_attendess_raw #The second step Flume starts writing files flume-ng agent -n a1 -c /opt/soft/ flume160/conf/ -f /opt/flumecfg/event_attendees.conf -Dflume.root.looger=INFO,console #The third step is to view the data in kafka kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.64.210:9092 --topic event_attendess_raw #View the number of topics kafka-topics.sh --zookeeper 192.168.64.210:2181 --list #Delete topic kafka-topics.sh --zookeeper 192.168.64.210:2181 --topic event_attendess_raw - -delete #View the delete log cd /opt/soft/kafka200/kafka-logs ls