HIVE UDF 空值转换(NULL 以及 空字符串)类似NVL+COALESCE

类似NVL,但是功能更强大。对于NULL以及空字符串都认为是空值,并依次取首个非空值。参数可多个。

一、代码

import org.apache.directory.api.util.Strings;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;

@Description(
        name = "bvl",
        value = "_FUNC_(value1,value2,value3...) - If the value is null or an empty string, the next non null value will be returned until the last.",
        extended = "Example:\n  > SELECT _FUNC_(null,'','Hi');\n  Hi"
)
public class GenericUDFBvl extends GenericUDF {
    private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
    private transient ObjectInspector[] argumentOIs;

    public GenericUDFBvl() {
    }

    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        this.argumentOIs = arguments;
        if (arguments.length < 2) {
            throw new UDFArgumentLengthException("The function bvl takes at least 2 arguments, got " + arguments.length);
        } else {
            this.returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);

            for (int i = 0; i < arguments.length; ++i) {
                if (!this.returnOIResolver.update(arguments[i])) {
                    throw new UDFArgumentTypeException(i, "The expressions after BVL should all have the same type: \"" + this.returnOIResolver.get().getTypeName() + "\" is expected but \"" + arguments[i].getTypeName() + "\" is found");
                }
            }

            return this.returnOIResolver.get();
        }
    }

    public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException {
        for (int i = 0; i < arguments.length; ++i) {
            Object ai = arguments[i].get();
            if (ai == null || Strings.isEmpty(Strings.trim(ai.toString()))) {
                continue;
            }
            return this.returnOIResolver.convertIfNecessary(ai, this.argumentOIs[i]);
        }

        return null;
    }

    public String getDisplayString(String[] children) {
        return "BVL";
    }
}

二、测试类

import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.junit.Assert;
import org.junit.Test;

public class GenericUDFBvlTest {
    private final GenericUDFBvl udfBvl = new GenericUDFBvl();

    private final StringObjectInspector soi = PrimitiveObjectInspectorFactory.javaStringObjectInspector;

    @Test
    public void evaluate() throws Exception {
        ObjectInspector[] arguments = {soi, soi, soi, soi};
        udfBvl.initialize(arguments);

        GenericUDF.DeferredObject sObj1 = new GenericUDF.DeferredJavaObject(null);
        GenericUDF.DeferredObject sObj2 = new GenericUDF.DeferredJavaObject(" ");
        GenericUDF.DeferredObject sObj3 = new GenericUDF.DeferredJavaObject("Hi");
        GenericUDF.DeferredObject sObj4 = new GenericUDF.DeferredJavaObject("Hello");
        GenericUDF.DeferredObject[] args = {sObj1, sObj2, sObj3, sObj4};
        Assert.assertEquals("Hi", udfBvl.evaluate(args).toString());
    }
}

猜你喜欢

转载自blog.csdn.net/qq_37771475/article/details/127785070