1、执行的Python脚本
/Users/nisj/PycharmProjects/BiDataProc/Demand/Cc0810/ccQuery_sum.py
/home/hadoop/nisj/automationDemand/cc/ccQuery_sum.py
2、Crontab示例
脚本通过定时每个整点执行;为防止一小时内一次执行未执行完,进行行了crontab加锁操作;python脚本的一部分代码每天执行一次,通过【if time.strftime('%H', time.localtime(time.time())) == '10':】来确保每天的10点执行一次;另外,根据【if getRoomIdAndFirstRecDates():】来进行自带粉丝数主体程序是否执行的判断。
/Users/nisj/PycharmProjects/BiDataProc/Demand/Cc0810/ccQuery_sum.py
# -*- coding=utf-8 -*- import datetime import time import os import warnings import sys import re reload(sys) sys.setdefaultencoding('utf8') warnings.filterwarnings("ignore") yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') def getRoomIdAndFirstRecDates(): roomIds = os.popen("""source /etc/profile; \ /usr/bin/mysql -hMysqlHost -P6605 -uMysqlUser -pMysqlPass -N -e "select room_id,substr(first_rec_start_date,1,10) first_rec_start_date,substr(first_rec_end_date,1,10) first_rec_end_date,created_time \ from jellyfish_hadoop_stat.invite_anchor_sum \ where updated_time='2099-12-30 23:59:59'; \ " """).readlines(); roomId_list = [] for roomIdList in roomIds: roomId = re.split('\t', roomIdList.replace('\n', '')) roomId_list.append(roomId) return roomId_list def getFansCntUpdate2Mysql(): if time.strftime('%H', time.localtime(time.time())) == '10': # 临时表数据处理 os.system("""source /etc/profile; \ /usr/lib/hive-current/bin/hive -e " \ drop table if exists xxxxx_room_subscriber_dislodgebat; \ create table xxxxx_room_subscriber_dislodgebat as \ select a2.room_id,a2.uid,a2.state,a2.created_time \ from (select uid,created_time \ from oss_room_subscriber_roomid \ where pt_day='{yesterday}' \ group by uid,created_time \ having count(*)=1) a1 \ inner join oss_room_subscriber_roomid a2 on a1.uid=a2.uid and a1.created_time=a2.created_time \ where a2.pt_day='{yesterday}' \ ; \ " """.format(yesterday=(datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d'))); if getRoomIdAndFirstRecDates(): # 自带粉丝数据的计算 for roomId, first_rec_start_date, first_rec_end_date, created_time in getRoomIdAndFirstRecDates(): yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') curr_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) fans_byself_cnts=os.popen("""source /etc/profile; \ /usr/lib/hive-current/bin/hive -e " \ add jar /home/hadoop/nisj/udf-jar/hadoop_udf_radixChange.jar; \ create temporary function RadixChange as 'com.kascend.hadoop.RadixChange'; \ with tab_user_frist_subscriber as (select room_id,uid view_uid,state,substr(created_time,1,10) subscriber_date \ from (select room_id,uid,state,created_time,row_number()over(partition by uid order by created_time) rk from xxxxx_room_subscriber_dislodgebat) x \ where rk=1 and room_id={roomId}), \ tab_pay_info as (select uid,sum(amount) amount \ from data_chushou_pay_info \ where state=0 and pt_day between '{first_rec_start_date}' and '{first_rec_end_date}' \ group by uid), \ tab_access_log as( \ select distinct RadixChange(lower(uid),16,10) uid \ from bi_all_access_log \ where pt_day between '{first_rec_start_date}' and '{first_rec_end_date}' \ ) select a1.room_id,'{first_rec_start_date}' first_rec_start_date,'{first_rec_end_date}' first_rec_end_date,count(distinct a1.view_uid) fans_byself_cnt,sum(amount) rang_pay_amount,count(distinct a3.uid) last7day_remain \ from tab_user_frist_subscriber a1 \ left join tab_pay_info a2 on a1.view_uid=a2.uid \ left join tab_access_log a3 on a1.view_uid=a3.uid \ where a1.subscriber_date between '{first_rec_start_date}' and '{first_rec_end_date}' \ group by a1.room_id; \ " """.format(first_rec_start_date=first_rec_start_date, first_rec_end_date=first_rec_end_date, yesterday=yesterday, roomId=roomId)).readlines(); fans_byself_cnt_list = [] for fans_byself_cntList in fans_byself_cnts: fans_byself_cnt = re.split('\t', fans_byself_cntList.replace('\n', '')) fans_byself_cnt_list.append(fans_byself_cnt) for fans_byself_cnt in fans_byself_cnt_list: roomId=fans_byself_cnt[0] fans_byself_cnt_val=fans_byself_cnt[3] rang_pay_amount=fans_byself_cnt[4] last7day_remain=fans_byself_cnt[5] os.system("""source /etc/profile; \ /usr/bin/mysql -hMysqlHost -P6605 -uMysqlUser -pMysqlPass -e "update jellyfish_hadoop_stat.invite_anchor_sum \ set fans_count={fans_byself_cnt}, \ amount={rang_pay_amount}, \ preserve={last7day_remain}, \ updated_time='{curr_time}' \ where room_id={roomId} and created_time='{created_time}'; \ " """.format(roomId=roomId, created_time=created_time, fans_byself_cnt=fans_byself_cnt_val, rang_pay_amount=rang_pay_amount, last7day_remain=last7day_remain, curr_time=curr_time)); # Batch Test getFansCntUpdate2Mysql()生产路径:
/home/hadoop/nisj/automationDemand/cc/ccQuery_sum.py
2、Crontab示例
[hadoop@emr-worker-9 cc]$ crontab -l 0 */1 * * * flock -xn /tmp/my.lock -c '/usr/bin/python /home/hadoop/nisj/automationDemand/cc/ccQuery_sum.py >> /home/hadoop/nisj/automationDemand/cc/ccQuery_sum.log 2>&1'3、一些说明
脚本通过定时每个整点执行;为防止一小时内一次执行未执行完,进行行了crontab加锁操作;python脚本的一部分代码每天执行一次,通过【if time.strftime('%H', time.localtime(time.time())) == '10':】来确保每天的10点执行一次;另外,根据【if getRoomIdAndFirstRecDates():】来进行自带粉丝数主体程序是否执行的判断。