版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/weixin_38750084/article/details/86028352
1.创建为mavn项目
2.jdk用的1.8
3.代码:
java:
package com.huayong;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.HashSet;
import java.util.Set;
/**
* Created by tang on 2019/01/07
*/
public class Udf_doubleMinSalary extends UDF {
public String evaluate(String a) {
return a+"____udf";
}
public static void main(String[] args) {
//System.out.println(evaluate(6));
}
}
pom:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>huayong</groupId>
<artifactId>udf_doubleMinSalary</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.2</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
4.打包:项目上右键---run as----maven install
5.上传到linux一个目录如:/var/lib/hadoop-hdfs/spride_sqoop_beijing/udf_jar
6.创建udf函数:
add jar /var/lib/hadoop-hdfs/spride_sqoop_beijing/udf_jar/udf_doubleMinSalary-0.0.1-SNAPSHOT.jar;
创建一个临时函数函数名为:doubleMinSalary create temporary function doubleMinSalary as 'com.huayong.Udf_doubleMinSalary';
|
udf 创建永久函数:
先把包传到hdfs:
hadoop fs -put /var/lib/hadoop-hdfs/spride_sqoop_beijing/udf_jar/udf_hive2kafka-0.0.1-SNAPSHOT.jar /user/hive/warehouse/ods.db/udf_jar/udf_hive2kafka-0.0.1-SNAPSHOT.jar
然后创建永久函数
CREATE FUNCTION udf_hive2kafka AS 'com.huayong.Hive2KakfaUDF'
USING JAR 'hdfs:///user/hive/warehouse/ods.db/udf_jar/udf_hive2kafka-0.0.1-SNAPSHOT.jar';
show functions:
执行sql如;
SELECT g,default.udf_hive2kafka('lienidata001:9092','bobizlist_tangzhanbo',collect_list(map(
'bo_id',bo_id,
'full_name', full_name,
'simple_name',simple_name,
'source',source,
'company_id',company_id,
'contact',contact,
'position',position,
'mobile_phone',mobile_phone,
'phone',phone,
'email',email,
'contact_source',contact_source,
'request_host',request_host,
'request_url',request_url,
'insert_time',insert_time
))) AS result
FROM
(
SELECT r1,pmod(ABS(hash(r1)),100) AS g,bo_id,full_name,simple_name,source,company_id,contact,position,mobile_phone,phone,email,contact_source,request_host,request_url,insert_time
FROM dws_bo_final_spider_contact
LIMIT 10000
) tmp
GROUP BY g;