大数据||Hive中常用的查询

Hive中的查询 Select


查询语法

SELECT [ALL | DISTINCT] select_expr, select_expr, ...
   FROM table_reference
   [WHERE where_condition]
   [GROUP BY col_list]
   [ORDER BY col_list]
   [CLUSTER BY col_list
     | [DISTRIBUTE BY col_list] [SORT BY col_list]
   ]
  [LIMIT [offset,] rows]

基本查询

 
 
select * from emp;
select t.empno,t.ename,t.deptno from emp t;

limit

select * from emp limit 5;


区间查询between and 

select * from emp e where e.sal between t.sal=10 and t.sal =1500;

空查询is null 

select t.empno,t.ename,t.deptno from emp t where t.deptno is null

不空查询is not null 

select t.empno,t.ename,t.deptno from emp t where t.deptno is not null

集合查询in 

select t.empno,t.ename,t.deptno from emp t where t.ename in ('lizh','forrest');

不在集合范围内not in 

select t.empno,t.ename,t.deptno from emp t where t.ename not in ('lizh','forrest');


函数查询

最高工资max

select max(sal) from emp e; 

最低工资min

select min(sal) from emp e;  

总人数count

select count(*) from emp e;

公司月总支出sum

select sum(sal) from emp e;

平均工资avg

select avg(sal) from emp e;  

高级查询


分组group by  

每个部门的平均工资

select e.deptno,avg(e.sal) from emp e  group by e.deptno;

having

where:是针对单条记录进行筛选。

having是针对分组数据进行筛选

求每个部门平均工资大于2000的数据

'select e.deptno,avg(e.sal) avgsal from emp e  group by e.deptno having avgsal>2000;'

join

两个表进行连接,例如有两个表m n ,m表中的一条记录和n表中的一条记录组成一条记录。

join on :等值连接

 
 
bin/hive -e 'select e.empno,e.ename, d.deptno,d.dname from emp e join dept d on e.deptno = d.deptno;'

left join:左连接

说明:左连接表示以join左边数据为主,若join右边的数据不存在则补空。

bin/hive -e 'select e.empno,e.ename, d.deptno,d.dname from emp e left join  dept d on e.deptno = d.deptno;'

right join

说明:右连接表示以join左边数据为主,若join左边的数据不存在则补空。

bin/hive -e 'select e.empno,e.ename, d.deptno,d.dname from emp e right join  dept d on e.deptno = d.deptno;'




猜你喜欢

转载自blog.csdn.net/qq_16095837/article/details/79465594