presto自定义函数解析nginx log

版权声明:本文为博主原创文章,未经博主允许不得转载。博客地址:http://www.fanlegefan.com/ https://blog.csdn.net/woloqun/article/details/86471719

nginx日志样例

192.168.1.111 - - [01/Dec/2018:00:00:02 +0800] "GET /?from=iOS_TestController&is_login=1&oid=aaaa1111&type=answer&uid=123456 HTTP/1.1" 200 0-0.000-- "-" "HaoHaoZhu/3.11.0 (iPhone; iOS 11.2.1; Scale/3.00)-sdfgdsg-fgggggggg-hzhy-eshisdff-vid_ec69906428031980aff5f89d11cb7d16-uid504vvvvv-fffff_tokenffffff1e8eb23169b789561-piPhone10_3-k3vo9" 192.168.1.122 - pcc.saa.ccc.cn

使用presto 的正则函数去解析这个字符串当然是可以的,但是容易出错,而且代码维护难度高,所以自己开发了一个nginx日志解析的函数:

nginx(varchar)
function type:scalar
return type:array(varchar)

使用方式

select try(nginx(line)) from table limit 10

注意:nginx这个函数最好和try一起使用,避免有些日志解析不了报错
具体代码如下pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>groupId</groupId>
    <artifactId>presto-hhz-plugin</artifactId>
    <version>1.0-SNAPSHOT</version>

    <packaging>presto-plugin</packaging>
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <java.version>1.8</java.version>
        <presto.plugin.version>0.3</presto.plugin.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>com.facebook.presto</groupId>
            <artifactId>presto-spi</artifactId>
            <version>0.215</version>
            <scope>provided</scope>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>com.facebook.presto</groupId>
                <artifactId>presto-maven-plugin</artifactId>
                <version>${presto.plugin.version}</version>
                <extensions>true</extensions>
            </plugin>
        </plugins>
    </build>
</project>

函数解析主体

package com.hhz.presto.pugins.impls;
import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.block.BlockBuilder;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlNullable;
import com.facebook.presto.spi.function.SqlType;
import com.facebook.presto.spi.type.StandardTypes;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.facebook.presto.spi.type.VarcharType.VARCHAR;
public class NginxLogFunction {

    static String regex = "([^ ]*) ([^ ]*) ([^ ]*) (\\[.*\\]) (\\\".*?\\\") (-|[0-9]*) ([^ ]*) (\\\".*?\\\") (\\\".*?\\\") ([^ ]*) ([^ ]*) ([^ ]*)";
    public static ArrayList<String> getMatcher( String source) {

        try {
            Pattern pattern = Pattern.compile(regex);
            final Matcher m = pattern.matcher(source);
            m.find();

            return new ArrayList<String>() {
                {
                    add(m.group(1));
                    add(m.group(2));
                    add(m.group(3));
                    add(m.group(4));
                    add(m.group(5));
                    add(m.group(6));
                    add(m.group(7));
                    add(m.group(8));
                    add(m.group(9));
                    add(m.group(10));
                    add(m.group(11));
                    add(m.group(12));
                }
            };
        }catch (Exception e){
            e.printStackTrace();
            return new ArrayList<String>();
        }
    }


    @ScalarFunction("nginx")
    @SqlType("array(varchar)")
    public static Block nginx(@SqlNullable @SqlType(StandardTypes.VARCHAR) Slice string)
    {

        ArrayList<String> list = getMatcher(string.toStringUtf8());
        BlockBuilder parts = VARCHAR.createBlockBuilder(null, 1, list.size());
        for(String str:list){
            VARCHAR.writeSlice(parts, Slices.utf8Slice(str));
        }

        if(list.size()==0){
            return null;
        }
        return parts.build();
    }
}

函数注册

package com.hhz.presto.pugins;

import com.facebook.presto.spi.Plugin;
import com.hhz.presto.pugins.impls.ConvFunction;
import com.hhz.presto.pugins.impls.NginxLogFunction;
import java.util.HashSet;
import java.util.Set;

public class HHZFunctionsPlugin
        implements Plugin
{
    public Set<Class<?>> getFunctions() {
        return new HashSet<Class<?>>() {
            {
                add(ConvFunction.class);
                add(NginxLogFunction.class);
            }
        };
    }
}

打包编译

mvn clean package

将编译好的jar放到所有presto集群的节点$PRESTO_HOME/plugins/udfs/目录下,udfs这个目录自己创建,名字任意,重启presto集群

猜你喜欢

转载自blog.csdn.net/woloqun/article/details/86471719