用Python进行经验值计算Hive中跑批

1、具体的脚本
脚本分三部分:历史经验值的计算、日常经验值的计算与经验汇总计算与等级转换。
/Users/nisj/PycharmProjects/BiDataProc/love/HiveRunData-wuyan-score.py
# -*- coding=utf-8 -*-
import os
import datetime
import warnings

warnings.filterwarnings("ignore")


def dateRange(beginDate, endDate):
    dates = []
    dt = datetime.datetime.strptime(beginDate, "%Y-%m-%d")
    date = beginDate[:]
    while date <= endDate:
        dates.append(date)
        dt = dt + datetime.timedelta(1)
        date = dt.strftime("%Y-%m-%d")
    return dates


def hiveRunHisData(runDay):
     os.system("""source /etc/profile; \
            /usr/lib/hive-current/bin/hive -e " \
            drop table if exists xxwuy_history_empirical_value; \
            create table xxwuy_history_empirical_value as \
            select a1.uid,a2.intimacy,a2.big_fans_cnt,a2.intimacy_max,a3.coin,coalesce(a2.intimacy,0)*0.7+coalesce(a3.coin,0)*4 his_empval,a4.point_count \
            from oss_bi_all_user_profile a1 \
            left join (select a2.creator_uid,sum(intimacy) intimacy,count(a1.uid) big_fans_cnt,max(intimacy) intimacy_max \
            from fans_all_big_fans_relation_total a1 \
            inner join oss_bi_all_room a2 on a1.room_id=a2.creator_uid \
            where a1.pt_day='{runDay}' and a2.pt_day='{runDay}' and a2.state=0 \
            group by a2.creator_uid) a2 on a1.uid=a2.creator_uid \
            left join (select uid,sum(coin) coin
            from xxx_user_coin_record
            where coin>=0 and substr(created_time,1,10)<='{runDay}'
            group by uid) a3 on a1.uid=a3.uid \
            left join xxx_user_gift_stat a4 on a1.uid=a4.uid \
            where a1.pt_day='{runDay}' and a1.state=0 and a1.last_login_time>='2018-03-01 00:00:00'; \
            " """.format(runDay=runDay));


def hiveRunData(runDay):
     os.system("""source /etc/profile; \
            /usr/lib/hive-current/bin/hive -e " \
            drop table if exists xxwuy_daily_empirical_value_view; \
            create table xxwuy_daily_empirical_value_view as \
            with tab_view as ( \
            select uid,roomid room_id,sum(view_time) view_time \
            from recommend_data_view \
            where pt_day='{runDay}' \
            group by uid,roomid), \
            tab_big_fans_relation as ( \
            select uid,room_id \
            from fans_all_big_fans_relation_total \
            where pt_day='{runDay}' and state=0 \
            group by uid,room_id) \
            select a1.uid,floor(a1.big_fans_view_time/10)*70 big_fans_view_empval,floor(a1.user_view_time/10)*60 user_view_empval \
            from (select a1.uid,sum(case when big_fans_view_time <= 100 then big_fans_view_time else 100 end) big_fans_view_time,(case when sum(user_view_time)<=120 then sum(user_view_time) else 120 end) user_view_time \
            from (select a1.uid,a1.room_id,sum(case when a2.uid is not null and a2.room_id is not null then a1.view_time else 0 end) big_fans_view_time,sum(case when a2.uid is null and a2.room_id is null then a1.view_time else 0 end) user_view_time \
            from tab_view a1 \
            left join tab_big_fans_relation a2 on a1.uid=a2.uid and a1.room_id=a2.room_id \
            group by a1.uid,a1.room_id) a1 \
            group by a1.uid) a1 \
            ; \

            drop table if exists xxwuy_daily_empirical_value_gift; \
            create table xxwuy_daily_empirical_value_gift as \
            with tab_gift as ( \
            select uid,room_id,sum(gift_point) gift_point \
            from honeycomb_all_gift_record \
            where pt_day = '{runDay}' \
            group by uid,room_id), \
            tab_big_fans_relation as ( \
            select uid,room_id \
            from fans_all_big_fans_relation_total \
            where pt_day='{runDay}' and state=0 \
            group by uid,room_id) \
            select a1.uid,a1.big_fans_gift_point*0.7 big_fans_gift_empval,a1.user_gift_point*0.6 user_gift_empval \
            from (select a1.uid,sum(case when a2.uid is not null and a2.room_id is not null then a1.gift_point else 0 end) big_fans_gift_point,sum(case when a2.uid is null and a2.room_id is null then a1.gift_point else 0 end) user_gift_point \
            from tab_gift a1 \
            left join tab_big_fans_relation a2 on a1.uid=a2.uid and a1.room_id=a2.room_id \
            group by a1.uid) a1 \
            ; \

            drop table if exists xxwuy_daily_empirical_value_timeline; \
            create table xxwuy_daily_empirical_value_timeline as \
            select a1.uid,a1.timeline_cnt curr_timeline_cnt,a2.timeline_cnt weekhalf_timeline_cnt,(case when coalesce(a2.timeline_cnt,0)>=2 then 0 else (case when a1.timeline_cnt+coalesce(a2.timeline_cnt,0)<=2 then a1.timeline_cnt else 2-coalesce(a2.timeline_cnt,0) end) end)*80 timeline_empval \
            from (select uid,count(*) timeline_cnt \
            from im_timeline_all_time_line \
            where pt_day='{runDay}' and state=0 \
            group by uid) a1 \
            left join (select uid,count(*) timeline_cnt \
            from im_timeline_all_time_line \
            where pt_day between date_sub('{runDay}',pmod(datediff('{runDay}', '2012-01-01'), 7)-1) and date_sub('{runDay}',1) and state=0 \
            group by uid) a2 on a1.uid=a2.uid; \

            drop table if exists xxwuy_daily_empirical_value_share; \
            create table xxwuy_daily_empirical_value_share as \
            select a1.uid,a1.share_cnt curr_share_cnt,a2.share_cnt weekhalf_share_cnt,(case when coalesce(a2.share_cnt,0)>=2 then 0 else (case when a1.share_cnt+coalesce(a2.share_cnt,0)<=2 then a1.share_cnt else 2-coalesce(a2.share_cnt,0) end) end)*80 share_empval \
            from (select uid,count(*) share_cnt \
            from oss_share_feedback \
            where pt_day='{runDay}' \
            group by uid) a1 \
            left join (select uid,count(*) share_cnt \
            from oss_share_feedback \
            where pt_day between date_sub('{runDay}',pmod(datediff('{runDay}', '2012-01-01'), 7)-1) and date_sub('{runDay}',1) \
            group by uid) a2 on a1.uid=a2.uid; \

            drop table if exists xxwuy_daily_empirical_value_novice; \
            create table xxwuy_daily_empirical_value_novice as \
            select uid,max(case when task_id=3 then 80 else 0 end) subscr_empval,max(case when task_id=12 then 120 else 0 end) mail_empval,max(case when task_id=14 then 240 else 0 end) live_empval,max(case when task_id=18 then 120 else 0 end) portrait_empval \
            from oss_user_task_done_list a1 \
            where pt_day='{runDay}' and  type ='1' and state in ('0','1') and task_id in(3,12,14,18) \
            group by uid; \

            alter table xxwuy_all_empirical_value drop if exists partition (pt_day='{runDay}'); \
            alter table xxwuy_all_empirical_value add partition (pt_day='{runDay}'); \
            insert overwrite table xxwuy_all_empirical_value partition (pt_day='{runDay}') \
            select a1.uid,coalesce(a1.his_empval,0), \
            coalesce(a2.big_fans_view_empval,0),coalesce(a2.user_view_empval,0), \
            coalesce(a3.big_fans_gift_empval,0),coalesce(a3.user_gift_empval,0), \
            coalesce(a4.timeline_empval,0), \
            coalesce(a5.share_empval,0), \
            coalesce(a6.subscr_empval,0),coalesce(a6.mail_empval,0),coalesce(a6.live_empval,0),coalesce(a6.portrait_empval,0), \
            coalesce(a2.big_fans_view_empval,0)+coalesce(a2.user_view_empval,0)+coalesce(a3.big_fans_gift_empval,0)+coalesce(a3.user_gift_empval,0)+coalesce(a4.timeline_empval,0)+coalesce(a5.share_empval,0)+coalesce(a6.subscr_empval,0)+coalesce(a6.mail_empval,0)+coalesce(a6.live_empval,0)+coalesce(a6.portrait_empval,0) today_add_empval \
            from xxwuy_history_empirical_value a1 \
            left join xxwuy_daily_empirical_value_view a2 on a1.uid=a2.uid \
            left join xxwuy_daily_empirical_value_gift a3 on a1.uid=a3.uid \
            left join xxwuy_daily_empirical_value_timeline a4 on a1.uid=a4.uid \
            left join xxwuy_daily_empirical_value_share a5 on a1.uid=a5.uid \
            left join xxwuy_daily_empirical_value_novice a6 on a1.uid=a6.uid \
            ;
            " """.format(runDay=runDay));


def hiveRunSumData(runDay):
     os.system("""source /etc/profile; \
            /usr/lib/hive-current/bin/hive -e " \
            drop table if exists xxwuy_all_empirical_value_sum; \
            create table xxwuy_all_empirical_value_sum as \
            select a1.uid,max(a1.his_empval) his_empval,sum(a1.today_add_empval) add_empval,max(a1.his_empval)+sum(a1.today_add_empval) total_curr_empval, \
            sum(a1.big_fans_view_empval) big_fans_view_empval,sum(a1.user_view_empval) user_view_empval,sum(a1.big_fans_gift_empval) big_fans_gift_empval,sum(a1.user_gift_empval) user_gift_empval, \
            sum(a1.timeline_empval) timeline_empval,sum(a1.share_empval) share_empval, \
            sum(a1.subscr_empval) subscr_empval,sum(a1.mail_empval) mail_empval,sum(a1.live_empval) live_empval,sum(a1.portrait_empval) portrait_empval, \
            sum(a1.subscr_empval)+sum(a1.mail_empval)+sum(a1.live_empval)+sum(a1.portrait_empval) novice_empval \
            from xxwuy_all_empirical_value a1 \
            group by a1.uid; \
            
            drop table if exists xxwuy_all_empirical_value_grade; \
            create table xxwuy_all_empirical_value_grade as \
            select /*+ MAPJOIN(a2) */ \
            '{runDay}' pt_day,a1.uid,a1.total_curr_empval,min(a2.grade) grade \
            from xxwuy_all_empirical_value_sum a1 \
            join xxwuy_empval_grade_map a2 \
            where a1.total_curr_empval<=a2.empval \
            group by a1.uid,a1.total_curr_empval \
            ; \
            " """.format(runDay=runDay));


# data batch
# hiveRunHisData(runDay='2018-06-04')
yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
for ptDay in dateRange(beginDate=yesterday, endDate=yesterday):
    print ptDay
    hiveRunData(runDay=ptDay)

hiveRunSumData(runDay=yesterday)
2、用到的分区建表及数据装载范式
分区表建表:
drop table if exists xxwuy_all_empirical_value;
CREATE TABLE `xxwuy_all_empirical_value`(
  `uid` bigint, 
  `his_empval` double, 
  `big_fans_view_empval` bigint, 
  `user_view_empval` bigint, 
  `big_fans_gift_empval` double, 
  `user_gift_empval` double, 
  `timeline_empval` bigint, 
  `share_empval` bigint, 
  `subscr_empval` int, 
  `mail_empval` int, 
  `live_empval` int, 
  `portrait_empval` int, 
  `today_add_empval` double)
PARTITIONED BY ( 
  `pt_day` string);
分区数据装载:
alter table xxwuy_all_empirical_value drop if exists partition (pt_day='{runDay}'); \
alter table xxwuy_all_empirical_value add partition (pt_day='{runDay}'); \
insert overwrite table xxwuy_all_empirical_value partition (pt_day='{runDay}') \
select a1.uid,coalesce(a1.his_empval,0), \
coalesce(a2.big_fans_view_empval,0),coalesce(a2.user_view_empval,0), \
coalesce(a3.big_fans_gift_empval,0),coalesce(a3.user_gift_empval,0), \
coalesce(a4.timeline_empval,0), \
coalesce(a5.share_empval,0), \
coalesce(a6.subscr_empval,0),coalesce(a6.mail_empval,0),coalesce(a6.live_empval,0),coalesce(a6.portrait_empval,0), \
coalesce(a2.big_fans_view_empval,0)+coalesce(a2.user_view_empval,0)+coalesce(a3.big_fans_gift_empval,0)+coalesce(a3.user_gift_empval,0)+coalesce(a4.timeline_empval,0)+coalesce(a5.share_empval,0)+coalesce(a6.subscr_empval,0)+coalesce(a6.mail_empval,0)+coalesce(a6.live_empval,0)+coalesce(a6.portrait_empval,0) today_add_empval \
from xxwuy_history_empirical_value a1 \
left join xxwuy_daily_empirical_value_view a2 on a1.uid=a2.uid \
left join xxwuy_daily_empirical_value_gift a3 on a1.uid=a3.uid \
left join xxwuy_daily_empirical_value_timeline a4 on a1.uid=a4.uid \
left join xxwuy_daily_empirical_value_share a5 on a1.uid=a5.uid \
left join xxwuy_daily_empirical_value_novice a6 on a1.uid=a6.uid \
;
3、其他说明
历史经验值关联等级导出:
hive -e "select /*+ MAPJOIN(a2) */
a1.uid,a1.intimacy,a1.big_fans_cnt,a1.intimacy_max,a1.coin,a1.his_empval,a1.point_count,min(a2.grade) grade
from xxwuy_history_empirical_value a1
join xxwuy_empval_grade_map a2
where a1.his_empval<>0 and a1.his_empval<=a2.empval
group by a1.uid,a1.intimacy,a1.big_fans_cnt,a1.intimacy_max,a1.coin,a1.his_empval,a1.point_count
;">history_empirical_0604.txt
最后的总经验值(历史+近期新增)需要写sql将xxwuy_all_empirical_value_sum和xxwuy_all_empirical_value_grade关联之后导出。

猜你喜欢

转载自blog.csdn.net/babyfish13/article/details/80589995