【hive】中各类join的使用

以下示例均是在hive中进行操作

创建表 t1:

create table a(id int,name string)
row format delimited
fields terminated by ',';

构造数据:
1,zs
2,ls
3,ww

加载数据到表t1:

load data local inpath '/root/a.dat' into table a;

创建表 t2:


create table b(id int,nickname string)
row format delimited
fields terminated by ',';

构造数据:
1,zzz
3,www
4,sss

将数据加载到表t2:

load data local inpath '/root/b.dat' into table b;


--两表的数据:
0: jdbc:hive2://cts03:10000> select * from a;
+-------+---------+--+
| a.id  | a.name  |
+-------+---------+--+
| 1     | zs      |
| 2     | ls      |
| 3     | ww      |
+-------+---------+--+


0: jdbc:hive2://cts03:10000> select * from b;
+-------+-------------+--+
| b.id  | b.nickname  |
+-------+-------------+--+
| 1     | zzz         |
| 3     | www         |
| 4     | sss         |
+-------+-------------+--+

-- 笛卡尔积:
0: jdbc:hive2://cts03:10000> select a.*,b.* from a join b;
+-------+---------+-------+-------------+--+
| a.id  | a.name  | b.id  | b.nickname  |
+-------+---------+-------+-------------+--+
| 1     | zs      | 1     | zzz         |
| 2     | ls      | 1     | zzz         |
| 3     | ww      | 1     | zzz         |
| 1     | zs      | 3     | www         |
| 2     | ls      | 3     | www         |
| 3     | ww      | 3     | www         |
| 1     | zs      | 4     | sss         |
| 2     | ls      | 4     | sss         |
| 3     | ww      | 4     | sss         |
+-------+---------+-------+-------------+--+

--内连接:
0: jdbc:hive2://cts03:10000> select a.*,b.* from a join b on a.id=b.id;
+-------+---------+-------+-------------+--+
| a.id  | a.name  | b.id  | b.nickname  |
+-------+---------+-------+-------------+--+
| 1     | zs      | 1     | zzz         |
| 3     | ww      | 3     | www         |
+-------+---------+-------+-------------+--+

--左外连接:左表数据全返回给我作为查询的输入数据集
0: jdbc:hive2://cts03:10000> select a.*,b.* from a left join b on a.id=b.id;
+-------+---------+-------+-------------+--+
| a.id  | a.name  | b.id  | b.nickname  |
+-------+---------+-------+-------------+--+
| 1     | zs      | 1     | zzz         |
| 2     | ls      | NULL  | NULL        |
| 3     | ww      | 3     | www         |
+-------+---------+-------+-------------+--+

--右外连接:右表的数据劝返回作为查询的输入数据集
0: jdbc:hive2://cts03:10000> select a.*,b.* from a right join b on a.id=b.id;
+-------+---------+-------+-------------+--+
| a.id  | a.name  | b.id  | b.nickname  |
+-------+---------+-------+-------------+--+
| 1     | zs      | 1     | zzz         |
| 3     | ww      | 3     | www         |
| NULL  | NULL    | 4     | sss         |
+-------+---------+-------+-------------+--+

--全外连接:两表的数据都需要返回作为查询的输入数据集
0: jdbc:hive2://cts03:10000> select a.*,b.* from a full join b on a.id=b.id;
+-------+---------+-------+-------------+--+
| a.id  | a.name  | b.id  | b.nickname  |
+-------+---------+-------+-------------+--+
| 1     | zs      | 1     | zzz         |
| 2     | ls      | NULL  | NULL        |
| 3     | ww      | 3     | www         |
| NULL  | NULL    | 4     | sss         |
+-------+---------+-------+-------------+--+

--左半连接(hive特有):按照内连接的规律连,但是只返回左半部分作为查询的输入数据集
0: jdbc:hive2://cts03:10000> select a.* from a left semi join b on a.id=b.id;
+-------+---------+
| a.id  | a.name  |
+-------+---------+
| 1     | zs      |
| 3     | ww      |
+-------+---------+

注: 左半连接是在mapreduce这种运算模型下对in子句的更高效实现
select id,name from a where id in(select distinct id from b)

猜你喜欢

转载自blog.csdn.net/weixin_39227099/article/details/86664536
今日推荐