hdfs memory test

hdfs storage test comparison

Data Format
Storage format TEXTFILE SequenceFile RCfile Euro Parquet ORC
Data size 65G 67G 61.5G 68.2 G 28.9 G 8.3G
load hive use times 286.319 seconds 118.45 seconds 106.212 seconds 163.988 seconds 136.663 seconds 130.186 seconds
CREATE TABLE IF NOT EXISTS prod_purchased_txt(
    uid            string,
    event_time      bigint,
    touch_point_id   string,
    et_city  string,
et_city_tier  string,
et_has_gifts  boolean,
et_has_points  boolean,
et_order_discount  float,
et_order_list_value  float,
et_order_quantity  float,
et_order_type  string,
et_order_value  float,
et_pay_mode  string,
et_prod_brand_list  array<string>,
et_prod_cate_name_list  array<string>,
et_prod_id_list  array<string>,
et_prod_list_price_list  array<float>,
et_prod_name_list  array<string>,
et_prod_quantity_list  array<float>,
et_province  string,
et_purchase_date_type  string
)
ROW FORMAT DELIMITED
  FIELDS TERMINATED BY '\t'
  LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
REATE TABLE IF NOT EXISTS prod_purchased_orc(
    uid            string,
    event_time      bigint,
    touch_point_id   string,
    et_city  string,
et_city_tier  string,
et_has_gifts  boolean,
et_has_points  boolean,
et_order_discount  float,
et_order_list_value  float,
et_order_quantity  float,
et_order_type  string,
et_order_value  float,
et_pay_mode  string,
et_prod_brand_list  array<string>,
et_prod_cate_name_list  array<string>,
et_prod_id_list  array<string>,
et_prod_list_price_list  array<float>,
et_prod_name_list  array<string>,
et_prod_quantity_list  array<float>,
et_province  string,
et_purchase_date_type  string
)partitioned by (process_date string)
ROW FORMAT DELIMITED
  FIELDS TERMINATED BY '\t'
  LINES TERMINATED BY '\n'
STORED AS ORC ;

======================================
STORED AS SequenceFile;
STORED AS RCfile ;
STORED AS Avro ;
STORED AS parquetfile ;
STORED AS ORC ;

Only text support load, import the data into other can insert

load data local inpath '/home/hadoop/prod_purchased.txt' into table prod_purchased_txt ;
insert into table prod_purchased_sq partition(process_date = '2019-06-01') select * from prod_purchased_txt;

Guess you like

Origin blog.csdn.net/lunhuasxk/article/details/90767147