hive instance operation

Hive operation demo

Under usr, mkdir /wang

hadoop fs -mkdir /upload  
hadoop fs -chmod g+w /upload   

upload data

[root@master wang]# hadoop fs -put emp.csv /upload
[root@master wang]# hadoop fs -put dept.csv /upload

Server: hiveserver2 &

Client:

Login with anonymous

beeline -u jdbc:hive2://master:10000/default

****Log in as root, we log in as root user, otherwise there is no permission to write

beeline -u jdbc:hive2://master:10000/default -n root

Create an employee table, use the statement of creating a table to specify the input file separator, and then load data to this table

create table emp001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) row format delimited fields terminated by ',';   # 字段分割用逗号

Create department table

create table dept001(deptno int,dname string,loc string) row format delimited fields terminated by ',';

Import Data

load data inpath '/upload/emp.csv' into table emp001;
load data inpath '/upload/dept.csv' into table dept001;

Create partitions based on the employee's department number

create table emp_part001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int) partitioned by (deptno int) row format delimited fields terminated by ',';

Insert data into the partition table: specify the partition of the imported data (import data through a subquery)

insert into table emp_part001 partition(deptno=10) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=10;
insert into table emp_part001 partition(deptno=20) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=20;
insert into table emp_part001 partition(deptno=30) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=30;

Create a bucket table and divide buckets according to the employee's job (job)

create table emp_bucket001 (empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) clustered by (job) into 4 buckets row format delimited fields terminated by ',';

Insert data through subquery

insert into emp_bucket001 select * from emp001;

Query employee information: employee number, name, salary

select empno,ename,sal from emp001;

Multi-table join query

select dept001.dname,emp001.ename from emp001,dept001 where emp001.deptno=dept001.deptno;

Number of employees

select count(empno) as emp_num from emp001;

Remove duplicate values

select distinct deptno from emp001;

Query how many types of positions the company has

select count(distinct job) from emp001;

Count the total number of people who were employed in 1981

select count(hiredate) as result from emp001 where hiredate like '%1981%';

Statistics of the total salary of each department

select deptno,sum(sal) from emp001 group by deptno;

Count how many employees are in each position

select job, count(*) as emp_num from emp001 group by job order by emp_num asc;

Query the earliest employee

select ename,hiredate from emp001
join
(select min(hiredate) as min_hiredate from emp001) t1
where hiredate=t1.min_hiredate;

Judging the salary level

select ename,empno,sal,
case when sal<2000 then 'low' when sal >=2000 
and sal <3000 then 'middle' 
else 'high' 
end as level 
from emp001 
order by sal desc; 

Give employees a salary increase according to the position, and display the salary before and after the increase

select empno,ename,job,sal,
case job when 'PRESIDENT' then sal+1000
when 'MANAGER' then sal+800 
else sal+400
end
from emp001;

Statistics of the regions with the largest number of employees in the first half

# cast用于转换数据类型
# substr用于截取字符串
select t1.loc,count(*)as emp_count
from 
(select dept001.loc,emp001.ename,
cast(substr(emp001.hiredate,6,2) as int) as hire_month
from dept001 join emp001 
on dept001.deptno=emp001.deptno) t1
where t1.hire_month<=6
group by t1.loc
order by emp_count desc
limit 1;

Guess you like

Origin blog.csdn.net/qq_46009608/article/details/112796151