Flume custom interceptor desensitizes data

Flume custom interceptor desensitizes data

foreword

Use the Flume custom interceptor to perform MD5 encryption processing on more sensitive data. In this example, the user password in the following data needs to be desensitized, and the gender field data needs to be filtered.

# 数据样例
# 用户名 密码 年龄 性别 身高 体重
  admin 123456 26170 70 

1. Write java program

  1. maven dependencies
<dependencies>
    <dependency>
        <groupId>org.apache.flume</groupId>
        <artifactId>flume-ng-sdk</artifactId>
        <version>1.8.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flume</groupId>
        <artifactId>flume-ng-core</artifactId>
        <version>1.8.0</version>
    </dependency>
</dependencies>
  1. custom interceptor

    • MyInterceptor class
public class MyInterceptor implements Interceptor {
    // 原始数据字段的分隔符
    private String fields_separator;
    // 需要保留字段索引,形式如:1,2,3,4
    private String indexs;
    // indexs的分隔符
    private String indexs_sparator;
    // 需要加密字段索引
    private String filed_encrypted_index;

    public MyInterceptor(String fields_separator, String indexs, String indexs_sparator, String filed_encrypted_index) {
        this.fields_separator = fields_separator;
        this.indexs = indexs;
        this.indexs_sparator = indexs_sparator;
        this.filed_encrypted_index = filed_encrypted_index;
    }

    public void initialize() {
    }

    public Event intercept(Event event) {
        if (event == null) return null;
        // 通过获取event数据,转化成字符串
        String line = new String(event.getBody(), Charsets.UTF_8);
        // 通过分隔符分割数据
        String[] split = line.split(fields_separator);
        //获取需要保留的字段索引
        String[] indexs_data = indexs.split(indexs_sparator);
        // 需要加密的索引
        int encrypted_index = Integer.parseInt(filed_encrypted_index);

        String newLine = "";
        for (int i = 0; i < indexs_data.length; i++) {
            int index = Integer.parseInt(indexs_data[i]);
            String filed = split[index];
            if (index == encrypted_index) {// 字段需要加密处理
                //通过MD5加密
                filed = StringUtils.GetMD5Code(filed);
            }
            // 拼接字符串
            newLine += filed;
            //字段之间添加分隔符,舍弃最后一个
            if (i < indexs_data.length - 1) {
                newLine += fields_separator;
            }
        }
        event.setBody(newLine.getBytes(Charsets.UTF_8));
        return event;
    }

    public List<Event> intercept(List<Event> list) {
        if (list == null) return null;
        List<Event> events = new ArrayList<Event>();
        for (Event event : list) {
            Event intercept = intercept(event);
            events.add(intercept);
        }
        return events;
    }

    public void close() {

    }

    public static class Builder implements Interceptor.Builder {

        // 原始数据字段的分隔符
        private String fields_separator;
        // 需要保留字段索引,形式如:1,2,3,4
        private String indexs;
        // indexs的分隔符
        private String indexs_sparator;
        // 需要加密字段索引
        private String filed_encrypted_index;

        public Interceptor build() {
            return new MyInterceptor(fields_separator, indexs, indexs_sparator, filed_encrypted_index);
        }

        public void configure(Context context) {
            fields_separator = context.getString(Constant.FIELDS_SEPARATOR, Constant.DEFAULT_FIELD_SEPARATOR);
            indexs = context.getString(Constant.INDEXS, Constant.DEFAULT_INDEXS);
            indexs_sparator = context.getString(Constant.INDEXS_SEPARATOR, Constant.DEFAULT_INDEXS_SEPARATOR);
            filed_encrypted_index = context.getString(Constant.FIELD_ENCRYPTED_INDEX, Constant.DEFAULT_FIELD_ENCRYPTED_INDEX);
        }
    }
}
  • StringUtils class
public class StringUtils {
    // 全局数组
    private final static String[] strDigits = {"0", "1", "2", "3", "4", "5",
            "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};

    // 返回形式为数字跟字符串
    private static String byteToArrayString(byte bByte) {
        int iRet = bByte;
        if (iRet < 0) {
            iRet += 256;
        }
        int iD1 = iRet / 16;
        int iD2 = iRet % 16;
        return strDigits[iD1] + strDigits[iD2];
    }

    // 返回形式只为数字
    private static String byteToNum(byte bByte) {
        int iRet = bByte;
        System.out.println("iRet1=" + iRet);
        if (iRet < 0) {
            iRet += 256;
        }
        return String.valueOf(iRet);
    }

    // 转换字节数组为16进制字串
    private static String byteToString(byte[] bByte) {
        StringBuilder sb = new StringBuilder();
        for (byte b : bByte) {
            sb.append(byteToArrayString(b));
        }
        return sb.toString();
    }

    public static String GetMD5Code(String strObj) {
        String resultString = null;
        try {
            MessageDigest md = MessageDigest.getInstance("MD5");
            // md.digest() 该函数返回值为存放哈希值结果的byte数组
            resultString = byteToString(md.digest(strObj.getBytes()));
        } catch (NoSuchAlgorithmException ex) {
            ex.printStackTrace();
        }
        return resultString == null ? strObj : resultString;
    }
}
  • Constants class: defines constants, corresponding to fields in the configuration file
public class Constant {
    // 字段分隔符
    public static final String FIELDS_SEPARATOR = "fields_separator";
    // 默认字段分隔符
    public static final String DEFAULT_FIELD_SEPARATOR = " ";
    // indexs
    public static final String INDEXS = "indexs";
    // 默认indexs为0
    public static final String DEFAULT_INDEXS = "0";
    // indexs的分隔符
    public static final String INDEXS_SEPARATOR = "indexs_separator";
    // 默认indexs分隔符
    public static final String DEFAULT_INDEXS_SEPARATOR = ",";
    // 加密字段索引
    public static final String FIELD_ENCRYPTED_INDEX = "filed_encrypted_index";
    // 默认加密字段索引,为空
    public static final String DEFAULT_FIELD_ENCRYPTED_INDEX = "";
}
  1. Typed into a jar package

as the picture shows:

image

image

Then select Build->Build Artfacts, a pop-up window appears, click Build to generate a jar in the out directory.

  1. upload to flume/libdirectory
scp Flume-Custom.jar hadoop@server01:/hadoop/flume/lib

image

Second, write the configuration file

Configuration file name: flume-custom-en.conf, put it in the flum/conf directory

a1.sources = r1
a1.sinks = s1
a1.channels = c1

#source
a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir = /home/hadoop/data
a1.sources.r1.batchSize= 50
# 设置字符集UTF-8
a1.sources.r1.inputCharset = UTF-8

a1.sources.r1.interceptors =i1 i2
# 自定义拦截器
a1.sources.r1.interceptors.i1.type =com.example.interceptor.MyInterceptor$Builder
# 字段分隔符
a1.sources.r1.interceptors.i1.fields_separator=,
# 需要保留的字段
a1.sources.r1.interceptors.i1.indexs =0,1,2,4,5
# indexs的分隔符
a1.sources.r1.interceptors.i1.indexs_separator=,
# 需要加密处理的字段索引
a1.sources.r1.interceptors.i1.filed_encrypted_index =0

a1.sources.r1.interceptors.i2.type = timestamp


#sink
a1.sinks.s1.type = hdfs
a1.sinks.s1.hdfs.path =hdfs://192.168.100.11:9000/flume-log/en/%Y%m%d
a1.sinks.s1.hdfs.filePrefix = event
a1.sinks.s1.hdfs.fileSuffix = .log
a1.sinks.s1.hdfs.rollSize = 10485760
a1.sinks.s1.hdfs.rollInterval =20
a1.sinks.s1.hdfs.rollCount = 0
a1.sinks.s1.hdfs.batchSize = 1500
a1.sinks.s1.hdfs.round = true
a1.sinks.s1.hdfs.roundUnit = minute
a1.sinks.s1.hdfs.threadsPoolSize = 25
a1.sinks.s1.hdfs.useLocalTimeStamp = true
a1.sinks.s1.hdfs.minBlockReplicas = 1
a1.sinks.s1.hdfs.fileType =DataStream
a1.sinks.s1.hdfs.writeFormat = Text
a1.sinks.s1.hdfs.callTimeout = 60000
a1.sinks.s1.hdfs.idleTimeout =60

#channel
a1.channels.c1.type = memory
a1.channels.c1.capacity=100000
a1.channels.c1.transactionCapacity=50000


# 设置r1 s1 c1之间的关系
a1.sources.r1.channels = c1
a1.sinks.s1.channel = c1

3. Execution

  1. Put the data to be desensitized into the /home/hadoop/data directory.

  2. The content of the test file is as follows

admin,123456,26,男,170,70
root,1234567,23,男,168,68
zhang,123456,22,女,165,51
hadoop,hadoop,25,男,173,71
  1. Start the flume service
bin/flume-ng agent -c conf -f conf/flume-custom-en.conf -name a1 -Dflume.root.logger=INFO,console
  1. View results in hdfs

image

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325805855&siteId=291194637