join操作
1. 创建两张表
create table t_a(id int,name string) row format delimited fields terminated by ',';
create table t_b(id int,name string) row format delimited fields terminated by ',';
a.txt内容如下:
1,aa
2,bb
3,cc
4,dd
5,ee
7,ff
b.txt内容如下:
2,qwer
3,earfe
9,oooo
2. 导入数据
load data local inpath '/home/hadoop/a.txt' into table t_a;
load data local inpath '/home/hadoop/b.txt' into table t_b;
3. join操作
-- inner join
select * from t_a a inner join t_b b on a.id = b.id;
+-------+---------+-------+---------+--+
| a.id | a.name | b.id | b.name |
+-------+---------+-------+---------+--+
| 2 | bb | 2 | qwer |
| 3 | cc | 3 | earfe |
+-------+---------+-------+---------+--+
2 rows selected (8.098 seconds)
-- left join
select * from t_a a left join t_b b on a.id = b.id;
+-------+---------+-------+---------+--+
| a.id | a.name | b.id | b.name |
+-------+---------+-------+---------+--+
| 1 | aa | NULL | NULL |
| 2 | bb | 2 | qwer |
| 3 | cc | 3 | earfe |
| 4 | dd | NULL | NULL |
| 5 | ee | NULL | NULL |
| 7 | ff | NULL | NULL |
+-------+---------+-------+---------+--+
-- right join
select * from t_a a right join t_b b on a.id = b.id;
+-------+---------+-------+---------+--+
| a.id | a.name | b.id | b.name |
+-------+---------+-------+---------+--+
| 2 | bb | 2 | qwer |
| 3 | cc | 3 | earfe |
| NULL | NULL | 9 | oooo |
+-------+---------+-------+---------+--+
-- full outer join
select * from t_a a full outer join t_b b on a.id = b.id;
+-------+---------+-------+---------+--+
| a.id | a.name | b.id | b.name |
+-------+---------+-------+---------+--+
| 1 | aa | NULL | NULL |
| 5 | ee | NULL | NULL |
| NULL | NULL | 9 | oooo |
| 4 | dd | NULL | NULL |
| 3 | cc | 3 | earfe |
| 7 | ff | NULL | NULL |
| 2 | bb | 2 | qwer |
+-------+---------+-------+---------+--+
-- left semi join
select * from t_a a left semi join t_b b on a.id = b.id;
+-------+---------+--+
| a.id | a.name |
+-------+---------+--+
| 2 | bb |
| 3 | cc |
+-------+---------+--+
2 rows selected (7.143 seconds)