Python脚本定时选择性不重复执行示例

1、执行的Python脚本
/Users/nisj/PycharmProjects/BiDataProc/Demand/Cc0810/ccQuery_sum.py
# -*- coding=utf-8 -*-
import datetime
import time
import os
import warnings
import sys
import re
reload(sys)
sys.setdefaultencoding('utf8')

warnings.filterwarnings("ignore")

yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d')

def getRoomIdAndFirstRecDates():
    roomIds = os.popen("""source /etc/profile; \
                /usr/bin/mysql  -hMysqlHost -P6605 -uMysqlUser -pMysqlPass -N -e "select room_id,substr(first_rec_start_date,1,10) first_rec_start_date,substr(first_rec_end_date,1,10) first_rec_end_date,created_time \
                from jellyfish_hadoop_stat.invite_anchor_sum  \
                where updated_time='2099-12-30 23:59:59'; \
                " """).readlines();
    roomId_list = []
    for roomIdList in roomIds:
        roomId = re.split('\t', roomIdList.replace('\n', ''))
        roomId_list.append(roomId)
    return roomId_list

def getFansCntUpdate2Mysql():
    if time.strftime('%H', time.localtime(time.time())) == '10':
        # 临时表数据处理
        os.system("""source /etc/profile; \
            /usr/lib/hive-current/bin/hive -e " \
            drop table if exists xxxxx_room_subscriber_dislodgebat; \
            create table xxxxx_room_subscriber_dislodgebat as \
            select a2.room_id,a2.uid,a2.state,a2.created_time \
            from (select uid,created_time \
            from oss_room_subscriber_roomid  \
            where pt_day='{yesterday}' \
            group by uid,created_time \
            having count(*)=1) a1 \
            inner join oss_room_subscriber_roomid a2 on a1.uid=a2.uid and a1.created_time=a2.created_time \
            where a2.pt_day='{yesterday}' \
            ; \
            " """.format(yesterday=(datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d')));

    if getRoomIdAndFirstRecDates():
        # 自带粉丝数据的计算
        for roomId, first_rec_start_date, first_rec_end_date, created_time in getRoomIdAndFirstRecDates():
            yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
            curr_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
            fans_byself_cnts=os.popen("""source /etc/profile; \
                        /usr/lib/hive-current/bin/hive -e " \
                        add jar /home/hadoop/nisj/udf-jar/hadoop_udf_radixChange.jar; \
                        create temporary function RadixChange as 'com.kascend.hadoop.RadixChange'; \
                        with tab_user_frist_subscriber as (select room_id,uid view_uid,state,substr(created_time,1,10) subscriber_date \
                        from (select room_id,uid,state,created_time,row_number()over(partition by uid order by created_time) rk from xxxxx_room_subscriber_dislodgebat) x \
                        where rk=1 and room_id={roomId}), \
                        tab_pay_info as (select uid,sum(amount) amount \
                        from data_chushou_pay_info  \
                        where state=0 and pt_day between '{first_rec_start_date}' and '{first_rec_end_date}' \
                        group by uid), \
                        tab_access_log as(  \
                        select distinct RadixChange(lower(uid),16,10) uid  \
                        from bi_all_access_log  \
                        where pt_day between '{first_rec_start_date}' and '{first_rec_end_date}'  \
                        )
                        select a1.room_id,'{first_rec_start_date}' first_rec_start_date,'{first_rec_end_date}' first_rec_end_date,count(distinct a1.view_uid) fans_byself_cnt,sum(amount) rang_pay_amount,count(distinct a3.uid) last7day_remain \
                        from tab_user_frist_subscriber a1 \
                        left join tab_pay_info a2 on a1.view_uid=a2.uid \
                        left join tab_access_log a3 on a1.view_uid=a3.uid \
                        where a1.subscriber_date between '{first_rec_start_date}' and '{first_rec_end_date}' \
                        group by a1.room_id; \
                        " """.format(first_rec_start_date=first_rec_start_date, first_rec_end_date=first_rec_end_date, yesterday=yesterday, roomId=roomId)).readlines();

            fans_byself_cnt_list = []
            for fans_byself_cntList in fans_byself_cnts:
                fans_byself_cnt = re.split('\t', fans_byself_cntList.replace('\n', ''))
                fans_byself_cnt_list.append(fans_byself_cnt)
            for fans_byself_cnt in fans_byself_cnt_list:
                roomId=fans_byself_cnt[0]
                fans_byself_cnt_val=fans_byself_cnt[3]
                rang_pay_amount=fans_byself_cnt[4]
                last7day_remain=fans_byself_cnt[5]
                os.system("""source /etc/profile; \
                            /usr/bin/mysql  -hMysqlHost -P6605 -uMysqlUser -pMysqlPass -e "update jellyfish_hadoop_stat.invite_anchor_sum \
                            set fans_count={fans_byself_cnt}, \
                                amount={rang_pay_amount}, \
                                preserve={last7day_remain}, \
                                updated_time='{curr_time}' \
                            where room_id={roomId} and created_time='{created_time}'; \
                            " """.format(roomId=roomId, created_time=created_time, fans_byself_cnt=fans_byself_cnt_val, rang_pay_amount=rang_pay_amount, last7day_remain=last7day_remain, curr_time=curr_time));
# Batch Test
getFansCntUpdate2Mysql()
生产路径:
/home/hadoop/nisj/automationDemand/cc/ccQuery_sum.py
2、Crontab示例
[hadoop@emr-worker-9 cc]$ crontab -l
0 */1 * * * flock -xn /tmp/my.lock -c '/usr/bin/python /home/hadoop/nisj/automationDemand/cc/ccQuery_sum.py >> /home/hadoop/nisj/automationDemand/cc/ccQuery_sum.log 2>&1'
3、一些说明
脚本通过定时每个整点执行;为防止一小时内一次执行未执行完,进行行了crontab加锁操作;python脚本的一部分代码每天执行一次,通过【if time.strftime('%H', time.localtime(time.time())) == '10':】来确保每天的10点执行一次;另外,根据【if getRoomIdAndFirstRecDates():】来进行自带粉丝数主体程序是否执行的判断。

猜你喜欢

转载自blog.csdn.net/babyfish13/article/details/79539697