大数据之hive(四) --- function ,UDF 用户自定义函数 ,GenericUDF ,自定义NVL函数,自定义日期函数

一、function 函数
--------------------------------------------------------------------
    1.显示所有函数
        $hive> show functions;

    2.查看函数的帮助
        $hive> desc function cos;

    3.表生成函数
        $hive> explode (str ,exp )   //按照exp,炸开成一条条记录


二、UDF 用户自定义函数
-------------------------------------------------------------
    1.创建类,继承UDF
    
/**
     * 用户自定义函数
     */

    //定义帮助
    @Description(name = "myadd",
            value = "_add_(int a ,int b ) --> retnrn a + b ",
            extended = "Example:\n"
                    + " > add(1,1) ==> 2 ;\n"
                    + " > add(1,1,1) ==> 3")

    public class TsUDF extends UDF{

        /**
         * 加法
         */
        public int evaluate(int a ,int b){
            return a + b;
        }

        public int evaluate(int a ,int b,int c){
            return a + b + c;
        }
    }

    2.打jar包
        jar cvf my.jar -C target/classes .

    3.添加jar包到hive的类路径(或者拷贝到hive/lib下)
        $hive> ADD JAR /share/my.jar;

    4.创建临时函数
        $hive> CREATE TEMPORARY FUNCTION myadd AS 'ts.demo.hive.TsUDF';

    5.在查询中使用自定义函数
        $hive> select myadd(1,2);


三、GenericUDF 通用的UDF -- 定义NVL函数
-------------------------------------------------------------
    1.自定义NVL函数

    
package ts.demo.hive;
    import org.apache.hadoop.hive.ql.exec.Description;
    import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
    import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
    import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
    import org.apache.hadoop.hive.ql.metadata.HiveException;
    import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
    import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
    import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;


    @Description(name = "nvl",
            value = "_FUNC_(value,default_value) - Returns default value if value"
                    +" is null else returns value",
            extended = "Example:\n"
                    + " > SELECT _FUNC_(null,'bla') FROM src LIMIT 1;\n")

    /**
     * 自定义空值处理函数
     */
    public class GenericUDFNvl extends GenericUDF {


        private GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
        private ObjectInspector[] argumentOIs;

        @Override
        public ObjectInspector initialize(ObjectInspector[] arguments)
                throws UDFArgumentException {
            argumentOIs = arguments;
            //检查参数个数
            if (arguments.length != 2) {
                throw new UDFArgumentLengthException(
                        "The operator 'NVL' accepts 2 arguments.");
            }
            returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
            //检查两个参数类型是否一致
            if (!(returnOIResolver.update(arguments[0]) && returnOIResolver
                    .update(arguments[1]))) {
                throw new UDFArgumentTypeException(2,
                        "The 1st and 2nd args of function NLV should have the same type, "
                                + "but they are different: \"" + arguments[0].getTypeName()
                                + "\" and \"" + arguments[1].getTypeName() + "\"");
            }
            return returnOIResolver.get();
        }

        //第一个参数不为空,返回第一个。否则返回第二个
        public Object evaluate(DeferredObject[] arguments) throws HiveException {
            Object retVal = returnOIResolver.convertIfNecessary(arguments[0].get(),
                    argumentOIs[0]);
            if (retVal == null ){
                retVal = returnOIResolver.convertIfNecessary(arguments[1].get(),
                        argumentOIs[1]);
            }
            return retVal;
        }

        //用于hadoop显示帮助
        public String getDisplayString(String[] children) {
            StringBuilder sb = new StringBuilder();
            sb.append("if ");
            sb.append(children[0]);
            sb.append(" is null ");
            sb.append("returns");
            sb.append(children[1]);
            return sb.toString() ;
        }
    }


    2.打包
        $hive> ADD JAR /path/to/jar.jar;

    3.添加到hive类路径
        $hive> CREATE TEMPORARY FUNCTION nvl AS 'ts.demo.hive.GenericUDFNvl';

    4.使用
        $hive> select nvl( NULL, 5 );
        $hive> select nvl( 1, 5 )


四、GenericUDF 通用的UDF -- 自定义日期函数
----------------------------------------------------------------
    1.将日期转成字符串进行输出

   
 @Description(name = "ToChar",
            value = "使用方式如下:toChar()",
            extended = "toChar_xxxx-ext")
    public class ToCharUDF extends GenericUDF {

        public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
            return null;
        }

        public Object evaluate(DeferredObject[] args) throws HiveException {
            //有参数
            if(args != null && args.length != 0){
                //指定日志对象的格式化串
                if(args.length == 1){
                    SimpleDateFormat sdf = new SimpleDateFormat();
                    sdf.applyPattern("yyyy/MM/dd hh:mm:ss");
                    return sdf.format((Date)(args[0].get()));
                }
                //两个参数,Date date,String frt
                else{
                    SimpleDateFormat sdf = new SimpleDateFormat();
                    sdf.applyPattern((String)args[1].get());
                    return sdf.format(args[0].get());
                }
            }
            //无参,返回系统时间的格式化串
            else{
                Date date = new Date();
                SimpleDateFormat sdf = new SimpleDateFormat();
                sdf.applyPattern("yyyy/MM/dd hh:mm:ss");
                return sdf.format(date);
            }
        }

        public String getDisplayString(String[] children) {
            return "toChar_xxx";
        }
    }


   2.将字符串转成日期进行输出

  
 @Description(name = "ToDate",
                   value = "toDate()",
                   extended = "toDate_xxxx-ext")
   public class ToDateUDF extends GenericUDF {

       public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
           return null;
       }

       public Object evaluate(DeferredObject[] args) throws HiveException {
           //有参数
           if(args != null && args.length != 0){
               //指定日志对象的格式化串
               if(args.length == 1){
                   SimpleDateFormat sdf = new SimpleDateFormat();
                   sdf.applyPattern("yyyy/MM/dd hh:mm:ss");
                   try {
                       return sdf.parse((String)(args[0].get()));
                   } catch (ParseException e) {
                       e.printStackTrace();
                   }
               }
               //两个参数,Date date,String frt
               else{
                   SimpleDateFormat sdf = new SimpleDateFormat();
                   sdf.applyPattern((String)args[1].get());
                   try {
                       return sdf.parse((String)args[0].get());
                   } catch (ParseException e) {
                       e.printStackTrace();
                   }
               }
           }
           //无参,返回系统时间对象
           else{
               return new Date();
           }
           return null ;
       }

       public String getDisplayString(String[] children) {
           return "toChar_xxx";
       }
   }

   3.打包
        $hive> ADD JAR /path/to/jar.jar;

   4.添加到hive类路径
        $hive> CREATE TEMPORARY FUNCTION nvl AS 'ts.demo.hive.***';

   5.使用
        $hive> select ***;
        $hive> select ***;

猜你喜欢

转载自blog.csdn.net/xcvbxv01/article/details/82625756