版权声明:本文为博主原创文章,未经博主允许不得转载。博客地址:http://www.fanlegefan.com/ https://blog.csdn.net/woloqun/article/details/86471719
nginx日志样例
192.168.1.111 - - [01/Dec/2018:00:00:02 +0800] "GET /?from=iOS_TestController&is_login=1&oid=aaaa1111&type=answer&uid=123456 HTTP/1.1" 200 0-0.000-- "-" "HaoHaoZhu/3.11.0 (iPhone; iOS 11.2.1; Scale/3.00)-sdfgdsg-fgggggggg-hzhy-eshisdff-vid_ec69906428031980aff5f89d11cb7d16-uid504vvvvv-fffff_tokenffffff1e8eb23169b789561-piPhone10_3-k3vo9" 192.168.1.122 - pcc.saa.ccc.cn
使用presto 的正则函数去解析这个字符串当然是可以的,但是容易出错,而且代码维护难度高,所以自己开发了一个nginx日志解析的函数:
nginx(varchar)
function type:scalar
return type:array(varchar)
使用方式
select try(nginx(line)) from table limit 10
注意:nginx这个函数最好和try一起使用,避免有些日志解析不了报错
具体代码如下pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>groupId</groupId>
<artifactId>presto-hhz-plugin</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>presto-plugin</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>1.8</java.version>
<presto.plugin.version>0.3</presto.plugin.version>
</properties>
<dependencies>
<dependency>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-spi</artifactId>
<version>0.215</version>
<scope>provided</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>com.facebook.presto</groupId>
<artifactId>presto-maven-plugin</artifactId>
<version>${presto.plugin.version}</version>
<extensions>true</extensions>
</plugin>
</plugins>
</build>
</project>
函数解析主体
package com.hhz.presto.pugins.impls;
import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.block.BlockBuilder;
import com.facebook.presto.spi.function.ScalarFunction;
import com.facebook.presto.spi.function.SqlNullable;
import com.facebook.presto.spi.function.SqlType;
import com.facebook.presto.spi.type.StandardTypes;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static com.facebook.presto.spi.type.VarcharType.VARCHAR;
public class NginxLogFunction {
static String regex = "([^ ]*) ([^ ]*) ([^ ]*) (\\[.*\\]) (\\\".*?\\\") (-|[0-9]*) ([^ ]*) (\\\".*?\\\") (\\\".*?\\\") ([^ ]*) ([^ ]*) ([^ ]*)";
public static ArrayList<String> getMatcher( String source) {
try {
Pattern pattern = Pattern.compile(regex);
final Matcher m = pattern.matcher(source);
m.find();
return new ArrayList<String>() {
{
add(m.group(1));
add(m.group(2));
add(m.group(3));
add(m.group(4));
add(m.group(5));
add(m.group(6));
add(m.group(7));
add(m.group(8));
add(m.group(9));
add(m.group(10));
add(m.group(11));
add(m.group(12));
}
};
}catch (Exception e){
e.printStackTrace();
return new ArrayList<String>();
}
}
@ScalarFunction("nginx")
@SqlType("array(varchar)")
public static Block nginx(@SqlNullable @SqlType(StandardTypes.VARCHAR) Slice string)
{
ArrayList<String> list = getMatcher(string.toStringUtf8());
BlockBuilder parts = VARCHAR.createBlockBuilder(null, 1, list.size());
for(String str:list){
VARCHAR.writeSlice(parts, Slices.utf8Slice(str));
}
if(list.size()==0){
return null;
}
return parts.build();
}
}
函数注册
package com.hhz.presto.pugins;
import com.facebook.presto.spi.Plugin;
import com.hhz.presto.pugins.impls.ConvFunction;
import com.hhz.presto.pugins.impls.NginxLogFunction;
import java.util.HashSet;
import java.util.Set;
public class HHZFunctionsPlugin
implements Plugin
{
public Set<Class<?>> getFunctions() {
return new HashSet<Class<?>>() {
{
add(ConvFunction.class);
add(NginxLogFunction.class);
}
};
}
}
打包编译
mvn clean package
将编译好的jar放到所有presto集群的节点$PRESTO_HOME/plugins/udfs/目录下,udfs这个目录自己创建,名字任意,重启presto集群