介绍Hive命令行的一个基本使用
注意:使用命令行时,每次操作语句都要以 ; 结尾
###交互命令
#查看帮助
bin/hive -help
# -e 不进入hive提示窗口执行sql语句
bin/hive -e "select id from student;"
# -f 执行脚本中sql语句
bin/hive -f /opt/hivef.sql
###hive提示窗口命令
#进入提示窗口
bin/hive
#在hive提示窗口命令中如何查看hdfs文件系统
hive(default)>dfs -ls /;
#在hive提示窗口命令中如何查看本地文件系统
hive(default)>! ls /opt/module/datas;
#显示出当前使用的数据库
hive> set hive.cli.print.current.db=true;
hive (testdb)>
#退出提示窗口
hive(default)>exit;
hive(default)>quit;
在用命令行操作hive时,需要在hive的安装目录的bin子目录下:执行如下命令:
hive
一、数据库操作(DDL)
- 显示数据库
#查看数据库
show databases;
hive> show databases;
OK
default
testdb
Time taken: 0.007 seconds, Fetched: 2 row(s)
#过滤显示查询的数据库(造了好多,独宠哪一个)
show databases like '表名';
hive> show databases like 'testdb';
OK
testdb
Time taken: 0.054 seconds, Fetched: 1 row(s)
hive>
- 查看数据库信息
#显示数据库信息
desc database DatabaseName;
hive> desc database testdb;
OK
testdb hdfs://hadoop01:9000/user/hive/warehouse/testdb.db root USER
Time taken: 0.022 seconds, Fetched: 1 row(s)
#显示数据库详细信息
desc database extended DatabaseName;
hive> describe formatted testdb;
OK
# col_name data_type comment
name string
des string
# Detailed Table Information
Database: testdb
Owner: root
CreateTime: Thu Mar 26 18:17:32 CST 2020
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: hdfs://hadoop01:9000/user/hive/warehouse/testdb.db/testdb
Table Type: MANAGED_TABLE
Table Parameters:
transient_lastDdlTime 1585217852
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
field.delim
serialization.format
Time taken: 0.087 seconds, Fetched: 28 row(s)
- 创建数据库
#创建数据库
create database if not exists DatabaseName;
hive> create database if not exists testdb;
OK
Time taken: 0.114 seconds
hive> show databases;
OK
default
testdb
Time taken: 0.007 seconds, Fetched: 2 row(s)
- 切换当前数据库
#切换当前数据库
use DatabaseName;
hive> use testdb;
OK
Time taken: 0.008 seconds
- 删除数据库
#删除数据库
drop database if exists DatabaseName;
hive> drop database if exists testdb;
OK
Time taken: 0.179 seconds
hive> show databases;
OK
default
Time taken: 0.007 seconds, Fetched: 1 row(s)
#数据库中若有表,则不能删除
#若想删除,使用命令:drop database if exists testdb cascade;
二、表操作(DML)
- 显示当前数据库中所有的表
#显示当前数据库中所有的表
show tables;
- 创建表
#创建表
create table if not exists 表名(
列名1 数据类型,
列名N 数据类型
)
row format DELIMITED
FIELDS terminated BY ‘ ’ //用来设置列之间的分隔符 /t /n
STORED AS TEXTFILE;
hive> create table testdb
> (
> name string,
> des string
>
> row format DELIMITED
>
> FIELDS terminated BY ' ' #
>
> STORED AS TEXTFILE;
OK
Time taken: 0.339 seconds
- 查询表结构信息
#查询表结构信息
desc 表名;
hive> desc testdb;
OK
name string
des string
Time taken: 0.286 seconds, Fetched: 2 row(s)
#查询表结构信息
describe formatted 表名;
hive> describe formatted testdb;
OK
# col_name data_type comment
name string
des string
# Detailed Table Information
Database: testdb
Owner: root
CreateTime: Thu Mar 26 18:17:32 CST 2020
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: hdfs://hadoop01:9000/user/hive/warehouse/testdb.db/testdb
Table Type: MANAGED_TABLE
Table Parameters:
transient_lastDdlTime 1585217852
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
field.delim
serialization.format
Time taken: 0.068 seconds, Fetched: 28 row(s)
- 加载插入数据
插入数据hive
insert into student values(数据,"数据");
hive> insert into stu values(2,"fengxun");
Query ID = root_20200402183353_7fb1712a-b564-4a9f-809c-8067014ca5cd
Total jobs = 3
Launching Job 1 out of 3
Number of reduce tasks is set to 0 since there's no reduce operator
Starting Job = job_1585820041973_0002, Tracking URL = http://hadoop01:8088/proxy/application_1585820041973_0002/
Kill Command = /opt/app/hadoop/bin/hadoop job -kill job_1585820041973_0002
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0
2020-04-02 18:34:29,268 Stage-1 map = 0%, reduce = 0%
2020-04-02 18:34:51,038 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 1.41 sec
MapReduce Total cumulative CPU time: 1 seconds 410 msec
Ended Job = job_1585820041973_0002
Stage-4 is selected by condition resolver.
Stage-3 is filtered out by condition resolver.
Stage-5 is filtered out by condition resolver.
Moving data to: hdfs://hadoop01:9000/user/hive/warehouse/testdb.db/stu/.hive-staging_hive_2020-04-02_18-33-53_876_3214154976853524502-1/-ext-10000
Loading data to table testdb.stu
Table testdb.stu stats: [numFiles=2, numRows=2, totalSize=17, rawDataSize=15]
MapReduce Jobs Launched:
Stage-Stage-1: Map: 1 Cumulative CPU: 1.41 sec HDFS Read: 3648 HDFS Write: 76 SUCCESS
Total MapReduce CPU Time Spent: 1 seconds 410 msec
OK
Time taken: 60.276 seconds
hive> select * from stu;
OK
1 zhou
2 fengxun
Time taken: 0.296 seconds, Fetched: 2 row(s)
hive>
加载数据 将本地符合条件的文件,导入到hive
语法:
load data local inpath ‘本地路径’ into table 表名;
例子:
在本地(虚拟机上)创建文件data.txt 文件内容如下:
aaaaa
bbbbb
#创建 student 表, 并声明文件分隔符’\t’
hive> create table testdb(id int, name string) ROW FORMAT DELIMITED FIELDS
TERMINATED BY '\t';
注意:每一行的单词之家要以一个tab键隔开,相当于 。创建表时,必须指定每列之间的分隔符是/n, /t
然后执行如下命令:
load data local inpath '/local/data.txt' into table testdb;
- 查看数据
select * from 表名;
例子:
select * from testdb;
#复杂查询
语法:SELECT ... WHERE 查询复合where后的条件的记录
例子:
select * from testdb where name = 'mi’;
语法:SELECT ... ORDER BY 按照某个字段升序或者降序展示结果
注意:默认是某一字段升序排列
例子:
1. 升序:
select * from testdb2 order by name; #默认是升序
2. 降序:
select * from testdb2 order by name desc;
语法: SELECT... GROUP BY 按照某一字段分组
例子:
select name,count(*) from testdb group by name;
语法: SELECT ... JOIN 多表之间关联查询
有不同类型的联接给出如下:
JOIN不同类型的联接{ LEFT OUTER JOIN, RIGHT OUTER JOIN, FULL OUTER JOIN}
- 重命名表
#重命名表
alter table 旧表名 rename to 新表名 ;
例子:
alter table test rename to test_new ;
- 修改列
#新增列
语法:alter table testdb add columns(列名 数据类型);
例子:
alter table testdb add columns(className string);
#更新列
alter table testdb change column className 列名 数据类型;
例子:
alter table testdb change column className className_new int;
#替换所有列
alter table testdb replace columns(列名1 数据类型, 列名2 数据类型);
例子:
alter table testdb replace columns(did int, dname string);
- 删除表
语法:
drop table 表名;
例子:
drop table testdb;
- 清空表中的数据
语法:
truncate table 表名;
例子:
truncate table testdb;