ハイブ操作デモ

usrの下で、mkdir / wang

hadoop fs -mkdir /upload  
hadoop fs -chmod g+w /upload

データをアップロードする

[root@master wang]# hadoop fs -put emp.csv /upload
[root@master wang]# hadoop fs -put dept.csv /upload

サーバー：hiveserver2＆

クライアント：

匿名でログイン

beeline -u jdbc：hive2：// master：10000 / default

**** rootとしてログインし、rootユーザーとしてログインします。それ以外の場合、書き込み権限はありません。

beeline -u jdbc：hive2：// master：10000 / default -n root

従業員テーブルを作成し、テーブルを作成するステートメントを使用して入力ファイルセパレータを指定してから、このテーブルにデータをロードします

create table emp001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) row format delimited fields terminated by ',';   # 字段分割用逗号

部門テーブルを作成する

create table dept001(deptno int,dname string,loc string) row format delimited fields terminated by ',';

データのインポート

load data inpath '/upload/emp.csv' into table emp001;
load data inpath '/upload/dept.csv' into table dept001;

従業員の部門番号に基づいてパーティションを作成します

create table emp_part001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int) partitioned by (deptno int) row format delimited fields terminated by ',';

パーティションテーブルにデータを挿入します。インポートされたデータのパーティションを指定します（サブクエリを介してデータをインポートします）

insert into table emp_part001 partition(deptno=10) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=10;
insert into table emp_part001 partition(deptno=20) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=20;
insert into table emp_part001 partition(deptno=30) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=30;

バケットテーブルを作成し、従業員の仕事（仕事）に応じてバケットを分割します

create table emp_bucket001 (empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) clustered by (job) into 4 buckets row format delimited fields terminated by ',';

サブクエリを介してデータを挿入します

insert into emp_bucket001 select * from emp001;

従業員情報の照会：従業員番号、名前、給与

select empno,ename,sal from emp001;

マルチテーブル結合クエリ

select dept001.dname,emp001.ename from emp001,dept001 where emp001.deptno=dept001.deptno;

就業者数

select count(empno) as emp_num from emp001;

重複する値を削除する

select distinct deptno from emp001;

会社のポジションの種類を問い合わせる

select count(distinct job) from emp001;

1981年に雇用された人の総数を数える

select count(hiredate) as result from emp001 where hiredate like '%1981%';

各部門の総給与の統計

select deptno,sum(sal) from emp001 group by deptno;

各ポジションにいる従業員の数を数えます

select job, count(*) as emp_num from emp001 group by job order by emp_num asc;

最古の従業員に問い合わせる

select ename,hiredate from emp001
join
(select min(hiredate) as min_hiredate from emp001) t1
where hiredate=t1.min_hiredate;

給与水準の判断

select ename,empno,sal,
case when sal<2000 then 'low' when sal >=2000 
and sal <3000 then 'middle' 
else 'high' 
end as level 
from emp001 
order by sal desc;

役職に応じて昇給し、昇給前後の給与を表示します。

select empno,ename,job,sal,
case job when 'PRESIDENT' then sal+1000
when 'MANAGER' then sal+800 
else sal+400
end
from emp001;

上半期の従業員数が最も多い地域の統計

# cast用于转换数据类型
# substr用于截取字符串
select t1.loc,count(*)as emp_count
from 
(select dept001.loc,emp001.ename,
cast(substr(emp001.hiredate,6,2) as int) as hire_month
from dept001 join emp001 
on dept001.deptno=emp001.deptno) t1
where t1.hire_month<=6
group by t1.loc
order by emp_count desc
limit 1;

ハイブインスタンス操作

ハイブ操作デモ

おすすめ