官网教程地址:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DML
创建和删除数据库
新建一个名为test的数据库:
CREATE DATABASE test;
执行完成之后刷新连接,可以看到创建成功
删除数据库:
DROP DATABASE IF EXISTS test;
创建表单
CREATE TABLE IF NOT EXISTS test.people ( id int, name String, destination String)
COMMENT 'This is a test table'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
插入数据
INSERT INTO TABLE test.people
VALUES (1, 'zhouzhou', 'first peopele'), (2, 'Sanra', 'second people');
修改数据
用update要报错,最后找到这么个奇怪的语法凑合用着(用新的数据覆盖原来的数据)
FROM test.people
INSERT OVERWRITE TABLE test.people SELECT id,name, "yoyo" as destination;
执行效果如下:
数据迁移
从数据库中导出数据到txt文件**(在linux环境执行)**
hive -e 'SELECT * FROM test.people WHERE province=2' >> /web/net/people.txt
将txt文件导入数据库
LOAD DATA LOCAL INPATH '/web/net/people.txt' INTO TABLE test.people;
这里需要注意的是,导入txt文件的表单设置
CREATE TABLE `test.beijing`(
`id` string COMMENT '编号',
`name` string COMMENT '名字',
`insert_time` timestamp COMMENT '插入时间')
row format delimited
fields terminated by '\t';
迁移到新的表单后需要设置空值信息NULL:
alter table test.people set serdeproperties('serialization.null.format' = 'NULL')
设置后还是不行的话,就替换掉
FROM test.people
INSERT OVERWRITE TABLE bdrs.client_info PARTITION(insert_date='2019-09')
SELECT id,if(name is null, -1, name) name, insert_time;
FROM test.people
INSERT OVERWRITE TABLE bdrs.client_info PARTITION(insert_date='2019-09')
SELECT id,if(name =null, -1, name) name, insert_time;
删除数据
删除分区
ALTER TABLE bdrs.client_info DROP IF EXISTS PARTITION (pdate='2019-08')