1、创建一张外部分区表(分区需要另外添加)
CREATE EXTERNAL TABLE `ext_base_click`(
`reqid` string,
`adslotid` string,
`ip` string,
`timestamp` string,
`adgroupid` string,
`lbs` string)
PARTITIONED BY (
`date` string,
`hour` int)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'field.delim'='|',
'line.delim'='\n',
'serialization.format'='|')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
2、创建一张外表(需写好分区位置)
CREATE EXTERNAL TABLE `b_base_adx`(
`id` string,
`enname` string,
`zhname` string,
`description` string,
`supportdeal` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'field.delim'='|',
'line.delim'='\n',
'serialization.format'='|')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://tercel/user/mapred2/hive_basetable/b_base_adx'
3、表字段的修改(注意锁表问题、ORC文件表字段的修改、矢量查询)
--hive 新增一列
ALTER TABLE dsp.int_optimize_bidrequest ADD COLUMNS (app_id String);
--hive 修改列名
ALTER TABLE ext_preprocess_click CHANGE col_old_name col_new_name String AFTER column_name;
--hive 删除替换全部列
ALTER TABLE ext_base_conversion REPLACE COLUMNS (reqid string, adslotid string, adx string, deviceid string);
--解锁hive表
show locks your_table
unlock table your_table
4、hive常用语句
--外部表 load数据
load data inpath '/user/hebin/data/media/deviceid_10w/deviceid_10w.txt'
into table zhtsh_tmp.deviceid_10w;
--判断字符串是否都为数字
select 1 from base_media_baidu where '123456' rlike '^\\d+$';
--新增外部表分区
alter table media_stats add partition (date ="2018-02-02",hour=5) location 'hdfs://tercel/user/userplatform/DAILY/$day/media-stats';
--删除表分区
ALTER TABLE dsp.ext_preprocess_bidrequest DROP IF EXISTS PARTITION (date ="1018-01-05",hour=5);
--添加hive_udf
add jar hdfs://tercel//user/mapred2/myudf/hive/hiveudf.jar;
create temporary function md5 as 'com.Md5';
5、分割字符串,行转列
select adslotallowedsize_
from dsp_media.b_report_flow_daily lateral view explode(split(adslotallowedsize,'_')) b AS adslotallowedsize_
where date='2018-03-07'
6、select除了某些字段之外的剩余所有字段
set hive.support.quoted.identifiers=None;
select `(name|id|pwd)?+.+` from tableName;
7、不同的adx中的曝光top3的机型
select *
from
(
select adx,brandmodelid,c,row_number() over (partition by adx order by adx,c desc) ord
from
(
select adx,brandmodelid,count(1) c from dsp.ext_base_impression WHERE date='2017-07-03' AND hour=4
group by adx,brandmodelid
) t1
) t2
where ord<=3;
8、设备信息的组内占比
select a.source AS source,
a.sdk_version AS sdk_version,
a.device_info_count AS device_info_count,
sum(a.device_info_count) over(partition by a.source) as group_device_info_count
from
(select source, split(sdk_version,'\\|')[0] AS sdk_version, count(1) AS device_info_count
from stg.stg_dev_device_info_d
where day=20180426
group by source, split(sdk_version,'\\|')[0]
order by cast(source AS int),sdk_version) AS a