Hive常用核心技能

@羲凡——只为了更好的活着
Hive是一个非常常用的数据仓库，任何一家公司都会有，只是用的深浅的区别。下面我总结一下自己用到的一些hive知识点

1.创建删除库和表

#创建数据库
CREATE DATABASE IF NOT EXISTS hivetest; 
#删除数据库(数据库下没有表时)
DROP DATABASE IF EXISTS hivetest;
#删除数据库(数据库下有表时)
DROP DATABASE IF EXISTS hivetest CASCADE;

#创建表
CREATE TABLE IF NOT EXISTS hivetest.staff 
( eid int, name String, age int, salary int)
COMMENT 'Aaron hive test table'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n' 
STORED AS TEXTFILE;

#创建分区表
CREATE TABLE IF NOT EXISTS hivetest.user_login 
(use_id String, login_time timestamp)
COMMENT 'Aaron hive test partition table'
PARTITIONED BY (login_year int,login_month int)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

#删除表
DROP TABLE IF EXISTS hivetest.staff;

2.加载清除数据

# 加上local表示冲本地加载，去掉local表示从hdfs上加载
load data local inpath '/data/staff' into table hivetest.staff #从本地插入
load data local inpath '/data/staff' overwrite into table hivetest.staff #从本地覆盖插入
# 插入分区表
load data inpath '/data/user_login ' overwrite into table  hivetest.user_login partition(login_year =2019,login_month =1);

#清除表所有数据
truncate table hivetest.staff 
#删除分区
alter table hivetest.user_login drop if exists partition (login_year =2019)

3.插入数据

#staff2必须先创建
insert into table hivetest.staff2 select * from hivetest.staff; 
insert overwrite table  hivetest.staff2 select * from hivetest.staff;

#插入分区表
insert overwrite hivetest.staff3(year='2019',month='01') select * from hivetest.staff;
insert overwrite hivetest.staff3（year='2018',month） select * from hivetest.staff;
insert overwrite hivetest.staff3（year,month） select * from hivetest.staff;

动态插入分区表，需要将hive.exec.dynamic.partition参数设成true(默认为false)。

4.传递参数

# hivetest.hql
set mapreduce.job.queuename=root.default;
set mapred.child.java.opts=-Xmx2048M;
select * from aarontest.stu_info where age>='${hivevar:age}';

# hivetest.sh
agevar=$1
hive -hivevar age=$agevar -S -f hivetest.hql > hivetest.out

5.创建函数

方式一，脚本中添加jar包，创建临时函数

add jar libs/hivejar-1.0-SNAPSHOT.jar;
create temporary function hive_f as 'com.hive.HiveUDF';

方式二，将jar包放在hdfs上，创建永久文件

add jar hdfs://ns/libs/hivejar-1.0-SNAPSHOT.jar;
create function hive_f as 'com.hive.HiveUDF';

6.修改location

#查看表结构包括location指向哪里
show create table tablename;

修改的两种方式
a.针对某个表修改

alter table t_m_cc set location 'hdfs://heracles/user/video-mvc/hive/warehouse/t_m_cc'

b.针对某个库或者全部数据库
直接修改hive元数据库的DBS表(创放库所在地址)和SDS表(创放表所在地址)

update DBS set DB_LOCATION_URI = replace(DB_LOCATION_URI,"oldpath","newpath")
update SDS  set location =replace(location,"oldpath","newpath")

之前公司集群不是高可用的，现在改成高可用后需要修改hive全部的元数据所在地址，我执行如下

update DBS set DB_LOCATION_URI=replace(DB_LOCATION_URI,"hdfs://deptest1:9000","hdfs://ns")
update SDS set location=replace(location,"hdfs://deptest1:9000","hdfs://ns")

===================================================================

@羲凡——只为了更好的活着

若对博客中有任何问题，欢迎留言交流