hive静态分区表,动态分区表详解,案例demo
数据文本,student.txt
1 zhansgan 12 man
2 lisi 13 man
3 xiaohong 16 woman
静态分区:将数据指定分配到某个分区下。
创建静态分区表案列
#创建表
create table student(
id string,
name string,
age string,
sex string
)
PARTITIONED BY(student_age string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
#加载数据
load data local inpath '/root/student.txt' overwrite into table student0
PARTITION (student_age='12');
静态分区表hdfs上文件存储形式如下:
动态分区:根据数据的某个或某几个字段的值将数据动态分到某个分区下
创建动态分区表案列一:多个分区字段时,全部实现动态分区插入数据
#开启动态分区
set hive.exec.dynamic.partition=true;
#设置为非严格模式
set hive.exec.dynamic.partition.mode=nonstrict;
#创建元表,导数据到动态分区表用
create table student0(
id string,
name string,
age_partition string,
sex_partition string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
#导入数据
load data local inpath '/root/student.txt' overwrite into table student0;
#创建动态分区表,分区字段sex_partition ,age_partition。
#注意:分区字段sex_partition ,age_partition,
#必须在student0表中,但不能在student2表中,不然会导入数据失败。
create table student2(
id2 string,
name2 string,
age2 string,
sex2 string
)
PARTITIONED BY(sex_partition string,age_partition string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
#从student0导入数据到student2
#注意:前四个字段是studet0中字段,后两个字段是分区字段
insert into table student2 PARTITION (sex_partition,age_partition)
select id,name,age_partition,sex_partition,sex_partition,age_partition from student0;
#查询数据
select * from student2 where sex_partition='man' and age_partition='12';
动态分区表hdfs上文件存储形式如下:
创建动态分区表案列二:多个分区字段时,实现半自动分区(部分字段静态分区,注意静态分区字段要在动态前面)
#从student0导入数据到student2
insert into table student2 PARTITION (sex_partition='man',age_partition)
select id,name,age_partition,sex_partition,age_partition from student0;
动态分区表hdfs上文件存储形式如下: