flume collect logs

flume收集日志
#flume测试代码
#配置Agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1


#配置source
a1.sources.r1.type = avro
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 44444
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = regex_extractor
a1.sources.r1.interceptors.i1.regex = ^(?:[^\\|]*\\|){14}\\d+_\\d+_(\\d+)\\|.*$
a1.sources.r1.interceptors.i1.serializers = s1
a1.sources.r1.interceptors.i1.serializers.s1.name = timestamp


#配置sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://hadoop01:9000/flux/reportTime=%Y-%m-%d
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 30
a1.sinks.k1.hdfs.rollSize = 0
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.timeZone = GMT+8


a1.sinks.k2.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k2.brokerList = hadoop01:9092,hadoop02:9092,hadoop03:9092
a1.sinks.k2.topic = netflow


#配置channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100


#绑定关系
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1




#客户端agent
#配置Agent
a1.sources = r1
a1.sinks = k1 k2 k3
a1.channels = c1


#配置source
a1.sources.r1.type = avro
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 44444
a1.sources.r1.interceptors = i1
a1.sources.r1.interceptors.i1.type = regex_extractor
a1.sources.r1.interceptors.i1.regex = ^(?:[^\\|]*\\|){14}\\d+_\\d+_(\\d+)\\|.*$
a1.sources.r1.interceptors.i1.serializers = s1
a1.sources.r1.interceptors.i1.serializers.s1.name = timestamp


#配置sink
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = Park01
a1.sinks.k1.port = 44444 a1.sinks.k2.type = avro a1.sinks.k2.hostname = Park02 a1.sinks.k2.port = 44444 a1.sinks.k3.type = avro a1.sinks.k3.hostname = Park03 a1.sinks.k3.port = 44444 a1.sinkgroups = g1











a1.sinkgroups.g1.sinks = k1 k2 k3
a1.sinkgroups.g1.processor.type = load_balance
a1.sinkgroups.g1.processor.backoff = true
a1.sinkgroups.g1.processor.selector = random #配置channel a1.channels.c1.type = memory a1.channels.c1.capacity = 1000 a1.channels.c1.transactionCapacity = 100 #绑定关系 a1.sources.r1.channels = c1 a1.sinks.k1.channel = c1 a1.sinks.k2.channel = c1 a1.sinks.k3.channel = c1 #中心服务器agent #配置agent a1.sources = r1 a1.sinks = k1 k2 a1.channels = c1 c2 #配置source a1.sources.r1.type = avro a1.sources.r1.bind = 0.0.0.0
























a1.sources.r1.port = 44444


#配置sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://ns/flux/reportTime=%Y-%m-%d
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 30
a1.sinks.k1.hdfs.rollSize = 0
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.timeZone = GMT+8


a1.sinks.k2.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k2.brokerList = Park01:9092,Park02:9092,Park03:9092
a1.sinks.k2.topic = netflow


#配置channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100 a1.channels.c2.type = memory a1.channels.c2.capacity = 1000



a1.channels.c2.transactionCapacity = 100 #Binding


relationship
a1.sources.r1.channels = c1 c2
a1.sinks.k1.channel = c1
a1.sinks.k2.channel = c2 The main role of parentheses in regular expressions There are two ways  to use a piece of regular content as a set of implementation operators to act on this set of content . Capture a piece of content as a capture group http://127.0.0.1/demo/b.jsp|b.jsp|pageB|UTF -8|1024x768|24-bit|en-us|1|1|18.0 r0|0.7864694688469172|http://127.0.0.1/demo/a.jsp|Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 ( KHTML, like Gecko) Chrome/44.0.2403.125 Safari/537.36|84488155274825025429|8736267587_6_1495160530159|0:0:0:0:0:0:0:1








Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325550590&siteId=291194637