hive支持存储的表的文件压缩,压缩可以减少空间,但同时耗费性能
建表语句
create table if not exists tb02(name string,age int ,male varchar(2) )row format delimited fields terminated by "," stored as sequencefile;
hive支持的压缩格式:
开启压缩
hive > set ---> 查看所有参数
hive > set hive.exec.compress.intermediate=true -- 开启中间 压缩
> set mapred.map.output.compression.codec = CodeName
#结果压缩
> set hive.exec.compress.output=true
> set mapred.map.output.compression.type = BLOCK/RECORD
注意:SequenceFile、RCFile、ORC格式的表不能直接从本地文件导入数据,数据要先导入到TextFile格式的表中,然后再从TextFile表中用insert导入到SequenceFile、RCFile表中。也就是
INSERT OVERWRITE TABLE raw_sequence SELECT * FROM raw;
hive导入hdfs上的压缩文件
导入的是压缩文件:
CREATE TABLE raw (line STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INPATH '/tmp/weblogs/20090603-access.log.gz' INTO TABLE raw;
为了效率可以:
CREATE TABLE raw (line STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';
CREATE TABLE raw_sequence (line STRING)
STORED AS SEQUENCEFILE;
LOAD DATA LOCAL INPATH '/tmp/weblogs/20090603-access.log.gz' INTO TABLE raw;
SET hive.exec.compress.output=true;
SET io.seqfile.compression.type=BLOCK;
INSERT OVERWRITE TABLE raw_sequence SELECT * FROM raw;