Hive operation demo
Under usr, mkdir /wang
hadoop fs -mkdir /upload
hadoop fs -chmod g+w /upload
upload data
[root@master wang]# hadoop fs -put emp.csv /upload
[root@master wang]# hadoop fs -put dept.csv /upload
Server: hiveserver2 &
Client:
Login with anonymous
beeline -u jdbc:hive2://master:10000/default
****Log in as root, we log in as root user, otherwise there is no permission to write
beeline -u jdbc:hive2://master:10000/default -n root
Create an employee table, use the statement of creating a table to specify the input file separator, and then load data to this table
create table emp001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) row format delimited fields terminated by ','; # 字段分割用逗号
Create department table
create table dept001(deptno int,dname string,loc string) row format delimited fields terminated by ',';
Import Data
load data inpath '/upload/emp.csv' into table emp001;
load data inpath '/upload/dept.csv' into table dept001;
Create partitions based on the employee's department number
create table emp_part001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int) partitioned by (deptno int) row format delimited fields terminated by ',';
Insert data into the partition table: specify the partition of the imported data (import data through a subquery)
insert into table emp_part001 partition(deptno=10) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=10;
insert into table emp_part001 partition(deptno=20) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=20;
insert into table emp_part001 partition(deptno=30) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=30;
Create a bucket table and divide buckets according to the employee's job (job)
create table emp_bucket001 (empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) clustered by (job) into 4 buckets row format delimited fields terminated by ',';
Insert data through subquery
insert into emp_bucket001 select * from emp001;
Query employee information: employee number, name, salary
select empno,ename,sal from emp001;
Multi-table join query
select dept001.dname,emp001.ename from emp001,dept001 where emp001.deptno=dept001.deptno;
Number of employees
select count(empno) as emp_num from emp001;
Remove duplicate values
select distinct deptno from emp001;
Query how many types of positions the company has
select count(distinct job) from emp001;
Count the total number of people who were employed in 1981
select count(hiredate) as result from emp001 where hiredate like '%1981%';
Statistics of the total salary of each department
select deptno,sum(sal) from emp001 group by deptno;
Count how many employees are in each position
select job, count(*) as emp_num from emp001 group by job order by emp_num asc;
Query the earliest employee
select ename,hiredate from emp001
join
(select min(hiredate) as min_hiredate from emp001) t1
where hiredate=t1.min_hiredate;
Judging the salary level
select ename,empno,sal,
case when sal<2000 then 'low' when sal >=2000
and sal <3000 then 'middle'
else 'high'
end as level
from emp001
order by sal desc;
Give employees a salary increase according to the position, and display the salary before and after the increase
select empno,ename,job,sal,
case job when 'PRESIDENT' then sal+1000
when 'MANAGER' then sal+800
else sal+400
end
from emp001;
Statistics of the regions with the largest number of employees in the first half
# cast用于转换数据类型
# substr用于截取字符串
select t1.loc,count(*)as emp_count
from
(select dept001.loc,emp001.ename,
cast(substr(emp001.hiredate,6,2) as int) as hire_month
from dept001 join emp001
on dept001.deptno=emp001.deptno) t1
where t1.hire_month<=6
group by t1.loc
order by emp_count desc
limit 1;