hive业务数据逻辑-非常用函数

--device_report//爽客数据分析需求//设备表
with temp_table1 as (
select t1.report_date,
-- 日期转化
       date_format(t1.report_date,'yyyyMMdd') as dashboard_day,
	   t1.place_code,
	   -- nvl函数是判断group_name是否为空，若为null则用其他替换，regexp_replace函数是判断更新的group_name是否有运营中心的字，如果有，则剔除
       regexp_replace(nvl(t2.group_name,'其他'),'运营中心','') as group_name, --大区
	   -- 考虑字符串类型，需要改大小写null
	   (case when t1.place_type_parent_name is null or lower(t1.place_type_parent_name) = 'null' then 'NULL' else t1.place_type_parent_name end) as place_type_parent_name,  --一级场景
	   (case when t1.place_type_child_name is null or lower(t1.place_type_child_name) = 'null' then 'NULL' else t1.place_type_child_name end) as place_type_child_name, --二级场景
	   t1.product_model,
	   (case when t3.series_code is null or lower(t3.series_code) = 'null' then 'NULL' else t3.series_code end) as series_code,  --设备系列
       t1.device_code,
       nvl(t1.total_amount,0) - nvl(t1.refund_amount,0) as total_amount
from jt_sk_ods.device_report t1
left join jt_sk_ods.place_groups t2
on t1.place_code = t2.place_code
left join jt_sk_ods.device_model t3 --设备型号-->设备系列
on t1.product_model = t3.name
where t1.contract_type_code in (1,3,5)
and t1.report_date is not null
and t1.start_service_time is not null 
and lower(t1.start_service_time) <> 'null'
and date_format(t1.report_date,'yyyyMMdd') >= date_format(from_unixtime(unix_timestamp(t1.start_service_time,'yyyy-MM-dd'),'yyyy-MM-dd'),'yyyyMMdd')
),
temp_table2 as (
select s1.dashboard_day,
       max(s1.dashboard_day) over() as max_day,
	   min(s1.dashboard_day) over() as min_day,
       s1.group_name,
	   s1.place_type_parent_name,
	   s1.place_type_child_name,
	   s1.series_code,
	   count(distinct s1.device_code) as device_cnt,
	   sum(s1.total_amount) as total_amount
from temp_table1 s1
group by s1.dashboard_day,
       s1.group_name,
	   s1.place_type_parent_name,
	   s1.place_type_child_name,
	   s1.series_code
)

select mk1.dashboard_day,
       -- 将日期替换
       from_unixtime(unix_timestamp(mk1.dashboard_day,'yyyyMMdd'),'yyyy-MM-dd') as report_date,
	   mk1.group_name,
	   mk1.group_name as group_one_floor1,
	   mk1.place_type_parent_name,
       mk1.place_type_parent_name as place_one_floor1,
       mk1.place_type_parent_name as place_one_floor2,
	   mk1.place_type_child_name,
       mk1.place_type_child_name as place_two_floor1,
       mk1.place_type_child_name as place_two_floor2,
	   mk1.series_code,
       mk1.series_code as model_one_floor1,
       mk1.series_code as model_one_floor2,
	   mk1.device_cnt,
	   mk1.total_amount,
	   count(distinct mk2.device_code) as device_count_7nullamount
from(
select kp1.dashboard_day,
	   kp1.group_name,
	   kp1.place_type_parent_name,
	   kp1.place_type_child_name,
	   kp1.series_code,
	   nvl(kp1.device_cnt,0) as device_cnt,
	   nvl(kp1.total_amount,0) as total_amount,
	   kp2.nearly_days_value
from(
select gh1.dashboard_day,
       gh1.nearly_days,
	   gh2.group_name,
	   gh2.place_type_parent_name,
	   gh2.place_type_child_name,
	   gh2.series_code,
	   gh3.device_cnt,
	   gh3.total_amount
from(
select pp1.period_code as dashboard_day,
		-- concat_ws()函数, 表示concat with separator,即有分隔符的字符串连接
		-- select concat_ws(',','11','22','33');
		-- 11,22,33 
       concat_ws(',',collect_set(pp2.period_code)) as nearly_days
from(
select op1.period_code
from dim.kuka_period op1
left join (select max_day,min_day from temp_table2 limit 1) op2 on 1 = 1
where op1.period_type = 'D'
and op1.period_code >= op2.min_day
and op1.period_code <= op2.max_day
) pp1
left join dim.kuka_period pp2
on pp2.period_type = 'D'
where date_format(date_add(from_unixtime(unix_timestamp(pp1.period_code,'yyyyMMdd'),'yyyy-MM-dd'),-7),'yyyyMMdd') <= pp2.period_code
and pp1.period_code >= pp2.period_code
group by pp1.period_code
) gh1
join (select distinct group_name,place_type_parent_name,place_type_child_name,series_code from temp_table2) gh2
left join temp_table2 gh3
on gh1.dashboard_day = gh3.dashboard_day
and gh2.group_name = gh3.group_name
and gh2.place_type_parent_name = gh3.place_type_parent_name
and gh2.place_type_child_name = gh3.place_type_child_name
and gh2.series_code = gh3.series_code
) kp1
-- 列转行，张三   A区，B区，C区     变成 张三  A区，张三   B区，张三 C 区
lateral view explode(split(kp1.nearly_days, ',')) kp2 as nearly_days_value
) mk1
left join temp_table1 mk2
on mk1.group_name = mk2.group_name
and mk1.place_type_parent_name = mk2.place_type_parent_name
and mk1.place_type_child_name = mk2.place_type_child_name
and mk1.series_code = mk2.series_code
and mk1.nearly_days_value = mk2.dashboard_day
group by mk1.dashboard_day,
	     mk1.group_name,
	     mk1.place_type_parent_name,
	     mk1.place_type_child_name,
	     mk1.series_code,
	     mk1.device_cnt,
	     mk1.total_amount
https://blog.csdn.net/shuicaohui5/article/details/3129489 concat函数
https://blog.csdn.net/youziguo/article/details/6837368 行转列函数
hive业务数据逻辑-非常用函数

猜你喜欢