利用排名函数进行Hive数据由竖到横计算示例

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/BabyFish13/article/details/81251649

1、源数据表结构、样例数据及说明

CREATE TABLE `karaoke_room_actor_snapshot_0` (
  `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '主键',
  `actor_id` bigint(20) NOT NULL COMMENT '演员 ID',
  `room_id` int(11) NOT NULL COMMENT '直播间 ID',
  `live_status_id` int(11) NOT NULL COMMENT '直播状态 ID',
  `uid` bigint(20) NOT NULL COMMENT '表演者 Uid',
  `music_id` int(11) NOT NULL DEFAULT '-1' COMMENT '伴奏 ID',
  `identifier` varchar(190) NOT NULL DEFAULT '' COMMENT '设备号',
  `score` bigint(20) NOT NULL DEFAULT '0' COMMENT '积分,辅助排序',
  `state` tinyint(4) NOT NULL DEFAULT '0' COMMENT '状态:0排麦中,1准备中,2表演中,-1表演结束,-2主动下麦,-3房主强制下麦,-4系统强制下麦',
  `created_time` datetime(6) NOT NULL COMMENT '创建时间',
  PRIMARY KEY (`id`),
  KEY `Index_roomId_lsId_uid` (`room_id`,`live_status_id`,`uid`),
  KEY `idx_actor_created` (`actor_id`,`created_time`),
  KEY `idx_room_live_state` (`room_id`,`live_status_id`,`state`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='麦序演员快照,按 room_id 256分表'
id	actor_id	room_id	live_status_id	uid	music_id	identifier	score	state	created_time
1	728	37194071	292	200134	62348	889134744	1531137151093	0	2018-07-09 19:52:31.000000
2	728	37194071	292	200134	62348	889134744	1531137151093	1	2018-07-09 19:52:33.000000
3	728	37194071	292	200134	62348	889134744	1531137151093	-3	2018-07-09 19:52:52.000000
4	729	37194071	292	200134	26081	889134744	1531137204230	0	2018-07-09 19:53:24.000000
5	729	37194071	292	200134	26081	889134744	1531137204230	1	2018-07-09 19:53:26.000000
6	729	37194071	292	200134	26081	889134744	1531137204230	-3	2018-07-09 19:53:44.000000
7	730	37194071	292	200134	26081	889134744	1531137395308	0	2018-07-09 19:56:35.000000
8	730	37194071	292	200134	26081	889134744	1531137395308	1	2018-07-09 19:56:38.000000
9	730	37194071	292	200134	26081	889134744	1531137395308	2	2018-07-09 19:56:45.000000
10	731	37194071	292	200119	84874	BBA67879-E1FB-4B0E-82E6-3841D8301EB2	1531137444615	0	2018-07-09 19:57:24.000000
11	732	37194071	292	330451222	114810	379085366	1531137497910	0	2018-07-09 19:58:17.000000
12	732	37194071	292	330451222	114810	379085366	1531137497910	-2	2018-07-09 19:58:38.000000
13	730	37194071	292	200134	26081	889134744	1531137395308	-3	2018-07-09 19:59:52.000000

说明:
用户进入、等待、退出等分别有一条记录;一个用户可以多次进行同一个房间;没有标记其哪几条(进入、退出)是同一次,只是根据靠近的时间来判断;现在要求根据竖状表整成横行表,计算出用户的表演时间、等待时间等。
2、问题分析及思路说明
从数据上观察,用户可以没有准备中及表演中的记录,但一定会有排麦中及退出(小于0)的记录。可以由此排名确定同一次,然后根据时间范围锁定其其他活动记录。
所以,先进行用户数据排名,之后进行其他数据的相关计算。

3、具体sql语句
为更好的清晰思路,主要采用多临时表的方式进行处理。

drop table if exists xxxlv_mic_range;
create table xxxlv_mic_range as
with tab_mic_start as (
select pt_day,room_id,uid,state,created_time,row_number()over(partition by room_id,uid order by created_time asc) mic_start_rn
from oss_all_karaoke_room_actor_snapshot
where pt_day between '2018-07-18' and '2018-07-26'
  and state=0),
tab_mic_over as (
select pt_day,room_id,uid,state,created_time,row_number()over(partition by room_id,uid order by created_time asc) mic_over_rn
from oss_all_karaoke_room_actor_snapshot
where pt_day between '2018-07-18' and '2018-07-26'
  and state<0)
select a1.pt_day,a1.room_id,a1.uid,a1.created_time mic_start_time,a2.created_time mic_over_time,a1.mic_start_rn
from tab_mic_start a1
left join tab_mic_over a2 on a1.pt_day=a2.pt_day and a1.room_id=a2.room_id and a1.uid=a2.uid and a1.mic_start_rn=a2.mic_over_rn;

drop table if exists xxxlv_mic_ready;
create table xxxlv_mic_ready as
select a1.pt_day,a1.room_id,a1.uid,a1.state,a1.created_time ready_time,a2.mic_start_time,a2.mic_over_time
from oss_all_karaoke_room_actor_snapshot a1
inner join xxxlv_mic_range a2 on a1.pt_day=a2.pt_day and a1.room_id=a2.room_id and a1.uid=a2.uid
where a1.pt_day between '2018-07-18' and '2018-07-26'
  and a1.state=1
  and a1.created_time between a2.mic_start_time and a2.mic_over_time;

drop table if exists xxxlv_mic_runing;
create table xxxlv_mic_runing as
select a1.pt_day,a1.room_id,a1.uid,a1.state,a1.created_time runing_time,a2.mic_start_time,a2.mic_over_time
from oss_all_karaoke_room_actor_snapshot a1
inner join xxxlv_mic_range a2 on a1.pt_day=a2.pt_day and a1.room_id=a2.room_id and a1.uid=a2.uid
where a1.pt_day between '2018-07-18' and '2018-07-26'
  and a1.state=2
  and a1.created_time between a2.mic_start_time and a2.mic_over_time;

drop table if exists xxxlv_mic_basic;
create table xxxlv_mic_basic as
select a0.pt_day,a0.room_id,a0.uid,a0.mic_start_time,a1.ready_time,a2.runing_time,a0.mic_over_time
from xxxlv_mic_range a0
left join xxxlv_mic_ready a1 on a0.pt_day=a1.pt_day and a0.room_id=a1.room_id and a0.uid=a1.uid and a0.mic_start_time=a1.mic_start_time and a0.mic_over_time=a1.mic_over_time
left join xxxlv_mic_runing a2 on a0.pt_day=a2.pt_day and a0.room_id=a2.room_id and a0.uid=a2.uid and a0.mic_start_time=a2.mic_start_time and a0.mic_over_time=a2.mic_over_time
;

drop table if exists xxxlv_mic_timelog;
create table xxxlv_mic_timelog as
select a1.pt_day,a1.uid,a3.nickname,a1.room_id,a1.runing_time,a1.mic_over_time,(unix_timestamp(a1.mic_over_time)-unix_timestamp(coalesce(a1.runing_time,a1.mic_over_time)))/60 act_times,(unix_timestamp(coalesce(a1.runing_time,a1.mic_over_time))-unix_timestamp(a1.mic_start_time))/60 wait_times,a1.mic_start_time
from xxxlv_mic_basic a1
inner join xxxlv_user_info a2 on a1.room_id=a2.room_id
left join (select uid,nickname from oss_bi_all_user_profile where pt_day='2018-07-22') a3 on a1.uid=a3.uid;


drop table if exists xxxlv_gift_record_info21;
create table xxxlv_gift_record_info21 as
select a1.pt_day,a1.receive_uid,a1.room_id,a2.mic_start_time,a2.mic_over_time,sum(case when a1.source=1 then a1.gift_count else 0 end) rechargeable_cnt,sum(case when a1.source=2 then a1.gift_count else 0 end) free_cnt,sum(a1.gift_count) all_cnt 
from oss_all_karaoke_gift_record a1
left join (select a1.pt_day,a1.uid,a1.nickname,a1.room_id,a1.runing_time,a1.mic_over_time,a1.mic_start_time
from xxxlv_mic_timelog a1
where a1.runing_time is not null) a2 on a1.pt_day=a2.pt_day and a1.receive_uid=a2.uid and a1.room_id=a2.room_id
where a1.pt_day between '2018-07-18' and '2018-07-26'
  and a1.created_time between a2.runing_time and a2.mic_over_time
group by a1.pt_day,a1.receive_uid,a1.room_id,a2.mic_start_time,a2.mic_over_time
;
-------------------------------------------------------------------------------------------
drop table if exists xxxlv_user_result21;
create table xxxlv_user_result21 as
select a1.pt_day,a1.uid,a1.nickname,a1.room_id,a1.runing_time,a1.mic_over_time,a1.act_times,a1.wait_times,a2.rechargeable_cnt,a2.free_cnt,a2.all_cnt
from xxxlv_mic_timelog a1
left join xxxlv_gift_record_info21 a2 on a1.pt_day=a2.pt_day and a1.uid=a2.receive_uid and a1.room_id=a2.room_id and a1.mic_start_time=a2.mic_start_time and a1.mic_over_time=a2.mic_over_time
;
-------------------------------------------------------------------------------------------
--2.2是2.1的汇总
drop table if exists xxxlv_user_result22;
create table xxxlv_user_result22 as
select a1.pt_day,a1.uid,a1.nickname,a1.room_id,sum(a1.act_times) act_times,sum(a1.wait_times) wait_times,count(*) mic_cnt,count(case when a1.runing_time is not null then a1.uid else null end) act_cnt,
sum(a1.rechargeable_cnt) rechargeable_cnt,sum(a1.free_cnt) free_cnt,sum(a1.all_cnt) all_cnt
from xxxlv_user_result21 a1
group by a1.pt_day,a1.uid,a1.nickname,a1.room_id;

猜你喜欢

转载自blog.csdn.net/BabyFish13/article/details/81251649