大数据-Flume拦截器

Flume拦截器


当Source读取events发送到Sink的时候,在events header中加入一些有用的信息,或者对events的内容进行过滤,完成初步的数据清洗。

Flume自带有六种拦截器,分别为时间拦截器主机拦截器UUID拦截器查询拦截器正则过滤拦截器正则抽取拦截器

时间拦截器


a1.sources=r1
a1.sinks=k1
a1.channels=c1

# define sources
a1.sources.r1.type=spooldir
a1.sources.r1.spoolDir=/usr/local/chinatelecom

# define interceptors
# add timestamp in the last of file
a1.sources.r1.interceptors=destiny
a1.sources.r1.interceptors.destiny.type=org.apache.flume.interceptor.TimeStampInterceptor$Builder

# define channels
a1.channels.c1.type=memory
a1.channels.c1.capacity=10000
a1.channels.c1.transactionCapacity=100

# define sinks
a1.sinks.k1.type=hdfs
a1.sinks.k1.hdfs.path=hdfs://hadoop1:9000/flume-interceptor/%H
a1.sinks.k1.hdfs.filePrefix=event-
a1.sinks.k1.hdfs.fileType=DataStream
a1.sinks.k1.hdfs.rollCount=0
a1.sinks.k1.hdfs.rollSize=134217728
a1.sinks.k1.hdfs.rollInterval=60

# component channel,sink,source
a1.sinks.k1.channel=c1
a1.sources.r1.channels=c1

主机拦截器


a1.sources=r1
a1.channels=c1
a1.sinks=k1

# define sources
a1.sources.r1.type=exec
a1.sources.r1.channels=c1
a1.sources.r1.command=tail -F /opt/Destiny

# define interceptors
a1.sources.r1.interceptors=destiny
a1.sources.r1.interceptors.destiny.type=host
# false->hostname true->ip address
a1.sources.r1.interceptors.chinatelecom.userIP=false
a1.sources.r1.interceptors.chinatelecom.hostHeader=agentHost

# define sinks
a1.sinks.k1.type=hdfs
a1.sinks.k1.hdfs.path=hdfs://hadoop1:9000/flumehost/%H
a1.sinks.k1.hdfs.filePrefix=Andy_%{agentHost}
a1.sinks.k1.hdfs.fileSuffix=.log
a1.sinks.k1.hdfs.fileType=DataStream
a1.sinks.k1.hdfs.writeFormat=Text
a1.sinks.k1.hdfs.rollInterval=10
a1.sinks.k1.hdfs.useLocalTimeStamp=true

# define channels
a1.channels.c1.type=memory
a1.channels.c1.capacity=10000
a1.channels.c1.transactionCapacity=100

# component channels,sources,sinks
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1

UUID拦截器


a1.sources=r1
a1.sinks=k1
a1.channels=c1

# define sources
a1.sources.r1.type=exec
a1.sources.r1.command=tail -F /usr/Local/UUID
a1.sources.r1.channels=c1

# define interceptors
a1.sources.r1.interceptors=destiny
a1.sources.r1.interceptors.destiny.type=org.apache.flume.sink.solr.morphline.UUIDInterceptor$Builder
a1.sources.r1.interceptors.chinatelecom.preserveExisting=true
a1.sources.r1.interceptors.chinatelecom.prefix=UUID_

# define sinks
a1.sinks.k1.type=logger

# define channels
a1.channels.c1.type=memory
a1.channels.c1.capacity=10000
a1.channels.c1.transactionCapacity=100

# component sources,sinks,channels
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1

查询拦截器


a1.sources=r1
a1.channels=c1
a1.sinks=k1

# define sources
a1.sources.r1.type=exec
a1.sources.r1.command=tail -F /usr/local/select
a1.sources.r1.channels=c1

# define interceptors
a1.sources.r1.interceptors=destiny
a1.sources.r1.interceptors.destiny.type=search_replace
a1.sources.r1.interceptors.destiny.searchPattern=\d+
a1.sources.r1.interceptors.destiny.replaceString=destiny
a1.sources.r1.interceptors.destiny.charset=UTF-8

# define sinks
a1.sinks.k1.type=logger

# define channels
a1.channels.c1.type=memory
a1.channels.c1.capacity=10000
a1.channels.c1.transactionCapacity=100

# component
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1

正则过滤拦截器


a1.sources=r1
a1.channels=c1
a1.sinks=k1

# define sources
a1.sources.r1.type=exec
a1.sources.r1.command=tail -F /usr/local/select
a1.sources.r1.channels=c1

# define interceptors
a1.sources.r1.interceptors=destiny
a1.sources.r1.interceptors.destiny.type=regex_filter
a1.sources.r1.interceptors.destiny.regex=^A.*
# ture表示过滤掉以A开头的event,false表示过滤掉不是以A开头的event
a1.sources.r1.interceptors.destiny.excludeEvent=true

# define sinks
a1.sinks.k1.type=logger

# define channels
a1.channels.c1.type=memory
a1.channels.c1.capacity=10000
a1.channels.c1.transactionCapacity=100

# component
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1

正则抽取拦截器


a1.sources=r1
a1.channels=c1
a1.sinks=k1

# define sources
a1.sources.r1.type=exec
a1.sources.r1.command=tail -F /usr/local/select
a1.sources.r1.channels=c1

# define interceptors
a1.sources.r1.interceptors=destiny
a1.sources.r1.interceptors.destiny.type=regex_extractor
a1.sources.r1.interceptors.destiny.regex=hostname is (.*?) ip is(.*)
a1.sources.r1.interceptors.destiny.serializers=s1 s2
a1.sources.r1.interceptors.destiny.serializers.s1.name=cookieid
a1.sources.r1.interceptors.destiny.seralizers.s2.name=ip

# define sinks
a1.sinks.k1.type=logger

# define channels
a1.channels.c1.type=memory
a1.channels.c1.capacity=10000
a1.channels.c1.transactionCapacity=100

# component
a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1

自定义拦截器


<!-- https://mvnrepository.com/artifact/org.apache.flume/flume-ng-core -->
 <dependency>
     <groupId>org.apache.flume</groupId>
     <artifactId>flume-ng-core</artifactId>
     <version>1.8.0</version>
 </dependency>
package com.maven.flume;

import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import java.util.ArrayList;
import java.util.List;

/**
 * @author Administrator
 */
public class FlumeInterceptor implements Interceptor {
    @Override
    public void initialize() {

    }

    @Override
    public Event intercept(Event event) {
        byte[] body = event.getBody();
        event.setBody(new StringBuilder().append(new String(body)).reverse().toString().getBytes());
        return null;
    }

    @Override
    public List<Event> intercept(List<Event> eventList) {
        ArrayList<Event> list = new ArrayList<>();
        for (Event event: eventList){
            list.add(intercept(event));
        }
        return list;
    }

    @Override
    public void close() {

    }

    private static class FlumeBuilder implements Builder{

        @Override
        public Interceptor build() {
            return new FlumeInterceptor();
        }

        @Override
        public void configure(Context context) {

        }
    }
}
# agent
a1.sources=r1
a1.sinks=k1
a1.channels=c1

# define sources
a1.sources.r1.type=exec
a1.sources.r1.command=tail -F -c +0 /usr/local/destiny.csv
a1.sources.r1.shell=/bin/bash -c

# define interceptors
a1.sources.r1.interceptors=destiny
a1.sources.r1.interceptors.destiny.type=com.maven.flume.FlumeInterceptor$Builder

# define sinks
a1.sinks.k1.type=hdfs
a1.sinks.k1.hdfs.path=hdfs://hadoop1:9000/destiny/%H
a1.sinks.k1.hdfs.fileType=DataStream
a1.sinks.k1.hdfs.filePrefix=Destiny-
a1.sinks.k1.hdfs.roundCount=60
a1.sinks.k1.hdfs.roundValue=1
a1.sinks.k1.hdfs.roundUnit=hour
a1.sinks.k1.hdfs.rollCount=0
a1.sinks.k1.hdfs.rollSize=134217728
a1.sinks.k1.hdfs.rollInterval=60
a1.sinks.k1.hdfs.useLocalTimeStamp=true

# define channels
a1.channels.c1.type=memory
a1.channels.c1.capacity=10000
a1.channels.c1.transactionCapacity=100

# component
a1.sources.r1.channels=c1
a1.sinks.k1.channel=k1

Fluem启动命令


flume-ng agent -n a1 -conf ./conf -conf-file ./conf/xx.conf -Dflume.root.logger=INFO.console
发布了131 篇原创文章 · 获赞 12 · 访问量 6万+

猜你喜欢

转载自blog.csdn.net/JavaDestiny/article/details/100999290