一、function 函数
--------------------------------------------------------------------
1.显示所有函数
$hive> show functions;
2.查看函数的帮助
$hive> desc function cos;
3.表生成函数
$hive> explode (str ,exp ) //按照exp,炸开成一条条记录
二、UDF 用户自定义函数
-------------------------------------------------------------
1.创建类,继承UDF
/**
* 用户自定义函数
*/
//定义帮助
@Description(name = "myadd",
value = "_add_(int a ,int b ) --> retnrn a + b ",
extended = "Example:\n"
+ " > add(1,1) ==> 2 ;\n"
+ " > add(1,1,1) ==> 3")
public class TsUDF extends UDF{
/**
* 加法
*/
public int evaluate(int a ,int b){
return a + b;
}
public int evaluate(int a ,int b,int c){
return a + b + c;
}
}
2.打jar包
jar cvf my.jar -C target/classes .
3.添加jar包到hive的类路径(或者拷贝到hive/lib下)
$hive> ADD JAR /share/my.jar;
4.创建临时函数
$hive> CREATE TEMPORARY FUNCTION myadd AS 'ts.demo.hive.TsUDF';
5.在查询中使用自定义函数
$hive> select myadd(1,2);
三、GenericUDF 通用的UDF -- 定义NVL函数
-------------------------------------------------------------
1.自定义NVL函数
package ts.demo.hive;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@Description(name = "nvl",
value = "_FUNC_(value,default_value) - Returns default value if value"
+" is null else returns value",
extended = "Example:\n"
+ " > SELECT _FUNC_(null,'bla') FROM src LIMIT 1;\n")
/**
* 自定义空值处理函数
*/
public class GenericUDFNvl extends GenericUDF {
private GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
private ObjectInspector[] argumentOIs;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentException {
argumentOIs = arguments;
//检查参数个数
if (arguments.length != 2) {
throw new UDFArgumentLengthException(
"The operator 'NVL' accepts 2 arguments.");
}
returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
//检查两个参数类型是否一致
if (!(returnOIResolver.update(arguments[0]) && returnOIResolver
.update(arguments[1]))) {
throw new UDFArgumentTypeException(2,
"The 1st and 2nd args of function NLV should have the same type, "
+ "but they are different: \"" + arguments[0].getTypeName()
+ "\" and \"" + arguments[1].getTypeName() + "\"");
}
return returnOIResolver.get();
}
//第一个参数不为空,返回第一个。否则返回第二个
public Object evaluate(DeferredObject[] arguments) throws HiveException {
Object retVal = returnOIResolver.convertIfNecessary(arguments[0].get(),
argumentOIs[0]);
if (retVal == null ){
retVal = returnOIResolver.convertIfNecessary(arguments[1].get(),
argumentOIs[1]);
}
return retVal;
}
//用于hadoop显示帮助
public String getDisplayString(String[] children) {
StringBuilder sb = new StringBuilder();
sb.append("if ");
sb.append(children[0]);
sb.append(" is null ");
sb.append("returns");
sb.append(children[1]);
return sb.toString() ;
}
}
2.打包
$hive> ADD JAR /path/to/jar.jar;
3.添加到hive类路径
$hive> CREATE TEMPORARY FUNCTION nvl AS 'ts.demo.hive.GenericUDFNvl';
4.使用
$hive> select nvl( NULL, 5 );
$hive> select nvl( 1, 5 )
四、GenericUDF 通用的UDF -- 自定义日期函数
----------------------------------------------------------------
1.将日期转成字符串进行输出
@Description(name = "ToChar",
value = "使用方式如下:toChar()",
extended = "toChar_xxxx-ext")
public class ToCharUDF extends GenericUDF {
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
return null;
}
public Object evaluate(DeferredObject[] args) throws HiveException {
//有参数
if(args != null && args.length != 0){
//指定日志对象的格式化串
if(args.length == 1){
SimpleDateFormat sdf = new SimpleDateFormat();
sdf.applyPattern("yyyy/MM/dd hh:mm:ss");
return sdf.format((Date)(args[0].get()));
}
//两个参数,Date date,String frt
else{
SimpleDateFormat sdf = new SimpleDateFormat();
sdf.applyPattern((String)args[1].get());
return sdf.format(args[0].get());
}
}
//无参,返回系统时间的格式化串
else{
Date date = new Date();
SimpleDateFormat sdf = new SimpleDateFormat();
sdf.applyPattern("yyyy/MM/dd hh:mm:ss");
return sdf.format(date);
}
}
public String getDisplayString(String[] children) {
return "toChar_xxx";
}
}
2.将字符串转成日期进行输出
@Description(name = "ToDate",
value = "toDate()",
extended = "toDate_xxxx-ext")
public class ToDateUDF extends GenericUDF {
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
return null;
}
public Object evaluate(DeferredObject[] args) throws HiveException {
//有参数
if(args != null && args.length != 0){
//指定日志对象的格式化串
if(args.length == 1){
SimpleDateFormat sdf = new SimpleDateFormat();
sdf.applyPattern("yyyy/MM/dd hh:mm:ss");
try {
return sdf.parse((String)(args[0].get()));
} catch (ParseException e) {
e.printStackTrace();
}
}
//两个参数,Date date,String frt
else{
SimpleDateFormat sdf = new SimpleDateFormat();
sdf.applyPattern((String)args[1].get());
try {
return sdf.parse((String)args[0].get());
} catch (ParseException e) {
e.printStackTrace();
}
}
}
//无参,返回系统时间对象
else{
return new Date();
}
return null ;
}
public String getDisplayString(String[] children) {
return "toChar_xxx";
}
}
3.打包
$hive> ADD JAR /path/to/jar.jar;
4.添加到hive类路径
$hive> CREATE TEMPORARY FUNCTION nvl AS 'ts.demo.hive.***';
5.使用
$hive> select ***;
$hive> select ***;