flume收集日志

flume收集日志
#flume测试代码
#配置Agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1


#配置source
a1.sources.r1.type = avro
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 44444
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = regex_extractor
a1.sources.r1.interceptors.i1.regex = ^(?:[^\\|]*\\|){14}\\d+_\\d+_(\\d+)\\|.*$
a1.sources.r1.interceptors.i1.serializers = s1
a1.sources.r1.interceptors.i1.serializers.s1.name = timestamp


#配置sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://hadoop01:9000/flux/reportTime=%Y-%m-%d
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 30
a1.sinks.k1.hdfs.rollSize = 0
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.timeZone = GMT+8


a1.sinks.k2.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k2.brokerList = hadoop01:9092,hadoop02:9092,hadoop03:9092
a1.sinks.k2.topic = netflow


#配置channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100


#绑定关系
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1




#客户端agent
#配置Agent
a1.sources = r1
a1.sinks = k1 k2 k3
a1.channels = c1


#配置source
a1.sources.r1.type = avro
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 44444
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = regex_extractor
a1.sources.r1.interceptors.i1.regex = ^(?:[^\\|]*\\|){14}\\d+_\\d+_(\\d+)\\|.*$
a1.sources.r1.interceptors.i1.serializers = s1
a1.sources.r1.interceptors.i1.serializers.s1.name = timestamp


#配置sink
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = Park01
a1.sinks.k1.port = 44444

a1.sinks.k2.type = avro
a1.sinks.k2.hostname = Park02
a1.sinks.k2.port = 44444

a1.sinks.k3.type = avro
a1.sinks.k3.hostname = Park03
a1.sinks.k3.port = 44444


a1.sinkgroups = g1
a1.sinkgroups.g1.sinks = k1 k2 k3
a1.sinkgroups.g1.processor.type = load_balance
a1.sinkgroups.g1.processor.backoff = true
a1.sinkgroups.g1.processor.selector = random

#配置channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100


#绑定关系
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c1
a1.sinks.k3.channel = c1


#中心服务器agent
#配置agent
a1.sources = r1
a1.sinks = k1 k2
a1.channels = c1 c2


#配置source
a1.sources.r1.type = avro
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 44444


#配置sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://ns/flux/reportTime=%Y-%m-%d
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 30
a1.sinks.k1.hdfs.rollSize = 0
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.timeZone = GMT+8


a1.sinks.k2.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k2.brokerList = Park01:9092,Park02:9092,Park03:9092
a1.sinks.k2.topic = netflow


#配置channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

a1.channels.c2.type = memory
a1.channels.c2.capacity = 1000
a1.channels.c2.transactionCapacity = 100


#绑定关系
a1.sources.r1.channels = c1 c2
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c2


括号在正则表达式中的作用主要有两方面 
将一段正则内容作为一组 实现操作符对这一组内容起作用
将一段内容作为捕获组进行捕获


http://127.0.0.1/demo/b.jsp|b.jsp|页面B|UTF-8|1024x768|24-bit|zh-cn|1|1|18.0 r0|0.7864694688469172|http://127.0.0.1/demo/a.jsp|Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.125 Safari/537.36|84488155274825025429|8736267587_6_1495160530159|0:0:0:0:0:0:0:1

猜你喜欢

转载自blog.csdn.net/vitaair/article/details/80220621