hive UDF 开发示例

一、创建一个java项目

对应的pom文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.credithc</groupId>
    <artifactId>hive_udf_v1.0</artifactId>
    <version>1.0-SNAPSHOT</version>


    <!-- 根据要连接的hadoop和hive，设置版本参数 -->
    <properties>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <maven-compiler-plugin.version>3.7.0</maven-compiler-plugin.version>

    </properties>

    <!-- 因为使用CDH的hadoop和hive，因此要添加CDH的官方repository，才能够下载相应的依赖包 -->
    <!-- 如果使用Apache版本的hadoop和hive，则不需要添加该repository -->
    <repositories>
        <repository>
            <id>cloudera</id>
            <url>http://repository.cloudera.com/artifactory/cloudera-repos</url>
        </repository>
    </repositories>
    <dependencies>
        <!-- 添加依赖组件，根据上方配置的版本参数和repository知识库下载依赖 -->
        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch-hadoop-mr</artifactId>
            <version>5.6.3</version>
        </dependency>

        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch-hadoop-hive</artifactId>
            <version>5.6.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>2.0.0</version>
        </dependency>
        <!-- junit是java的单元测试框架 -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.10</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-resources-plugin</artifactId>
            <version>2.4.3</version>
        </dependency>

    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.5.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

二、UDF函数创建开发：

package com.credithc.rc.kg.udf;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by glin on 2018/11/1 0001.    com.credithc.rc.kg.udf.MessageDecodeUdf
 */
public class MessageDecodeUdf extends UDF{

    public MessageDecodeUdf(){
    }

    public String evaluate(String str,String params) {
        if(StringUtils.isEmpty(str)||StringUtils.isEmpty(params))
            return null;
        String re = null;
        try {
            switch (params) {
                //时间抽取
                case "time":
                    re = parserTime(str);
                    break;
                //银行名称抽取
                case "bankName":
                    re = parserBankName(str);
                    break;
            }
        }catch (Exception e){

        }
        return re;
    }

    public String parserTime(String str){
        //提取时间
        Pattern p0 =Pattern.compile("\\d{4}年\\d{1,2}月\\d{1,2}日|\\d{1,2}月\\d{1,2}日|\\d{4}[-|/|.]\\d{1,2}[-|/|.]\\d{1,2}");
        //时间匹配
        Matcher m0 = p0.matcher(str);
        if(m0.find()){
            return m0.group(0);
        }else{
            return null;
        }
    }


    public String parserBankName(String str){
        //提取[]里的内容
        Pattern p1 = Pattern.compile("\\[(.+?银行)\\]");
        //银行
        Matcher m1 = p1.matcher(str);
        if(m1.find()){
            return m1.group(1);
        }else{
            return null;
        }
    }




    }

public static void main(String[] args) {
        MessageDecodeUdf dd = new MessageDecodeUdf();
        System.out.println(dd.evaluate(" 。下载“中国建设银行”手机银行APP 。[建设银行]", "time"));

    }
}

测试运行结果：

三、导出 jar包：

三、上传hive测试：

找到该函数，鼠标右键选择Copy Reference 获得该函数的全路径：com.credithc.ss.sd.udf.MessageDecodeUdf

-- 将上传的jar包导入到classpath变量里

hdfs dfs -put /home/sd/test/hive_udf_v1.0-1.0-SNAPSHOT.jar /user/sd/hive_udf/

list jars; -- 查看导入的jar包

create temporary function message_udf as 'com.credithc.ss.sd.udf.MessageDecodeUdf'; -- 创建一个临时函数，关联该jar包

使用测试：

select message_udf ( str, params) from kkkk;

猜你喜欢