Python进行Hive处理后数据分别插入到不同的mysql表

数据插入及处理的方式其实并没有什么新花样,主要是hive处理后的数据表,相同的表,不同的字段插入到两个不同的mysql表中。
/Users/nisj/PycharmProjects/BiDataProc/love/HiveDataSum2Mysql-0710forZw.py
# -*- coding=utf-8 -*-
import os
import re
import time
import datetime
import warnings
import sys
reload(sys)
sys.setdefaultencoding('utf8')

warnings.filterwarnings("ignore")

srcMysqlConfig_jellyfish_hadoop_stat = {
    'host': 'MysqlHost',
    # 'host': 'MysqlHost',
    'user': 'MysqlUser',
    'passwd': 'MysqlPass',
    'port': 6605,
    'db': 'jellyfish_hadoop_stat'
}


def dateRange(beginDate, endDate):
    dates = []
    dt = datetime.datetime.strptime(endDate, "%Y-%m-%d")
    date = endDate[:]
    while date >= beginDate:
        dates.append(date)
        dt = dt - datetime.timedelta(1)
        date = dt.strftime("%Y-%m-%d")
    return dates


def dataProcPre(runDay):
    os.system("""source /etc/profile; \
                /usr/lib/hive-current/bin/hive -e " \
                drop table if exists xxxhuangxt_user_room_info; \
                create table xxxhuangxt_user_room_info as \
                select a1.nickname,a2.room_id,a1.uid \
                from oss_bi_all_user_profile a1 \
                left join (select a1.room_id,a1.creator_uid \
                from (select a1.id room_id,a1.creator_uid,row_number()over(partition by a1.creator_uid order by a1.updated_time desc) rn \
                from oss_bi_all_room a1 \
                where a1.pt_day='{runDay}' and a1.state=0) a1 \
                where a1.rn=1) a2 on a1.uid=a2.creator_uid \
                where a1.pt_day='{runDay}'; \
                
                drop table if exists xxxhuangxt_bigfans_live; \
                create table xxxhuangxt_bigfans_live as \
                with tab_big_fans_relation as ( \
                select a1.room_id,a1.uid,a1.intimacy,case when a1.intimacy between 0 and 40 then 1 \
                     when a1.intimacy between 41 and 408-1 then 2 \
                     when a1.intimacy between 408 and 1020-1 then 3 \
                     when a1.intimacy between 1020 and 3061-1 then 4 \
                     when a1.intimacy between 3061 and 6122-1 then 5 \
                     when a1.intimacy between 6122 and 10204-1 then 6 \
                     when a1.intimacy between 10204 and 18367-1 then 7 \
                     when a1.intimacy between 18367 and 30612-1 then 8 \
                     when a1.intimacy between 30612 and 44898-1 then 9 \
                     when a1.intimacy between 44898 and 61224-1 then 10 \
                     when a1.intimacy between 61224 and 102041-1 then 11 \
                     when a1.intimacy between 102041 and 163265-1 then 12 \
                     when a1.intimacy between 163265 and 244898-1 then 13 \
                     when a1.intimacy between 244898 and 367347-1 then 14 \
                     when a1.intimacy between 367347 and 510204-1 then 15 \
                     when a1.intimacy between 510204 and 714286-1 then 16 \
                     when a1.intimacy between 714286 and 979592-1 then 17 \
                     when a1.intimacy between 979592 and 1326531-1 then 18 \
                     when a1.intimacy between 1326531 and 1734694-1 then 19 \
                     when a1.intimacy between 1734694 and 2244898-1 then 20 \
                     when a1.intimacy between 2244898 and 3265306-1 then 21 \
                     when a1.intimacy between 3265306 and 4693878-1 then 22 \
                     when a1.intimacy between 4693878 and 6530612-1 then 23 \
                     when a1.intimacy between 6530612 and 9183673-1 then 24 \
                     when a1.intimacy between 9183673 and 12244898-1 then 25 \
                     when a1.intimacy between 12244898 and 16326531-1 then 26 \
                     when a1.intimacy between 16326531 and 20408163-1 then 27 \
                     when a1.intimacy between 20408163 and 24489796-1 then 28 \
                     when a1.intimacy between 24489796 and 28571429-1 then 29 \
                     when a1.intimacy between 28571429 and 32653061-1 then 30 \
                     when a1.intimacy >= 32653061 then 30 \
                end bigfans_level,a1.state,row_number()over(partition by a1.uid order by a1.intimacy desc) user_grade_rn \
                from fans_all_big_fans_relation_total a1 \
                where a1.pt_day='{runDay}' and a1.state=-1), \
                tab_live_room as( \
                select a1.room_id \
                from honeycomb_all_live_history_status a1 \
                inner join oss_bi_all_room a2 on a1.room_id=a2.id \
                where a1.pt_day between date_sub('{runDay}',30) and '{runDay}' and a2.pt_day='{runDay}' and a2.state=0 \
                group by a1.room_id), \
                tab_phone_num as ( \
                select uid,phone_num \
                from oss_bi_all_user_phone_num \
                where pt_day='{runDay}' and state=0) \
                select a1.uid,a1.bigfans_level,a1.intimacy,a3.phone_num,a1.room_id \
                from tab_big_fans_relation a1 \
                inner join tab_live_room a2 on a1.room_id=a2.room_id \
                left join tab_phone_num a3 on a1.uid=a3.uid \
                where a1.user_grade_rn=1; \
                
                drop table if exists xxxhuangxt_bigfans_view; \
                create table xxxhuangxt_bigfans_view as \
                select a1.uid,a1.roomid room_id,sum(a1.view_time) view_time \
                from recommend_data_view a1 \
                inner join xxxhuangxt_bigfans_live a2 on a1.uid=a2.uid and a1.roomid=a2.room_id \
                where a1.pt_day between '2018-01-01' and '2018-07-06' \
                group by a1.uid,a1.roomid; \
                
                drop table if exists xxxhuangxt_bigfans_subscriber; \
                create table xxxhuangxt_bigfans_subscriber as \
                select a1.uid,a1.room_id,min(a1.created_time) subscriber_time \
                from oss_bi_all_room_subscriber_roomid a1 \
                inner join xxxhuangxt_bigfans_live a2 on a1.uid=a2.uid and a1.room_id=a2.room_id \
                where a1.pt_day='{runDay}' \
                group by a1.uid,a1.room_id;" """.format(runDay=runDay))


def hiveDataSum2Mysql_jiazw(runDay):
    # 参数初始化赋值
    host = srcMysqlConfig_jellyfish_hadoop_stat['host']
    port = srcMysqlConfig_jellyfish_hadoop_stat['port']
    user = srcMysqlConfig_jellyfish_hadoop_stat['user']
    passwd = srcMysqlConfig_jellyfish_hadoop_stat['passwd']
    db = srcMysqlConfig_jellyfish_hadoop_stat['db']

    os.system("""source /etc/profile; \
             /usr/bin/mysql -h{host} -P{port} -u{user} -p{passwd} -e "set names utf8;use {db}; \
             delete from bigfans_recall_record where \`date\`='{runDay}'; \
             delete from bigfans_recall_info where \`date\`='{runDay}'; " """.format(host=host, port=port, user=user,
                                                                                     passwd=passwd, db=db,
                                                                                     runDay=runDay))

    Sum_Data = os.popen("""/usr/lib/hive-current/bin/hive -e " \
            select '{runDay}' data_date,a1.uid,a1.bigfans_level,a1.intimacy,a1.phone_num,a1.room_id,a2.nickname fans_nickname,a3.nickname anchor_nickname,a4.view_time,a5.subscriber_time, \
            -1 state,from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss') created_time,from_unixtime(unix_timestamp(),'yyyy-MM-dd HH:mm:ss') updated_time \
            from xxxhuangxt_bigfans_live a1 \
            left join xxxhuangxt_user_room_info a2 on a1.uid=a2.uid \
            left join xxxhuangxt_user_room_info a3 on a1.room_id=a3.room_id \
            left join xxxhuangxt_bigfans_view a4 on a1.uid=a4.uid and a1.room_id=a4.room_id \
            left join xxxhuangxt_bigfans_subscriber a5 on a1.uid=a5.uid and a1.room_id=a5.room_id; \
            " """.format(runDay=runDay)).readlines();

    Sum_Data_list = []
    for sum_list in Sum_Data:
        sum = re.split('\t', sum_list.replace('\n', ''))
        Sum_Data_list.append(sum)

    i = 0
    insert_mysql_sql = """/usr/bin/mysql -h{host} -P{port} -u{user} -p{passwd} -e "set names utf8;use {db}; \
           insert into bigfans_recall_record(\`date\`,uid,phone_num,room_id,state,created_time,updated_time,nickname,anchorName) \
           values """.format(host=host, port=port, user=user, passwd=passwd, db=db)
    insert_mysql_sql_recallinfo = """/usr/bin/mysql -h{host} -P{port} -u{user} -p{passwd} -e "set names utf8;use {db}; \
               insert into bigfans_recall_info(uid,level,room_id,duration,follow_time,intimacy,state,\`date\`,created_time,updated_time) \
               values """.format(host=host, port=port, user=user, passwd=passwd, db=db)
    for sumd in Sum_Data_list:
        data_date = sumd[0]
        uid = sumd[1]
        bigfans_level = sumd[2]
        intimacy = sumd[3]
        phone_num = sumd[4]
        room_id = sumd[5]
        fans_nickname = sumd[6].replace(chr(10), '').replace(chr(39), '').replace('(', '').replace(')', '').replace(
            chr(96), '').replace(chr(34), '').replace(chr(92), '')
        anchor_nickname = sumd[7].replace(chr(10), '').replace(chr(39), '').replace('(', '').replace(')', '').replace(
            chr(96), '').replace(chr(34), '').replace(chr(92), '')
        view_time = sumd[8]
        subscriber_time = sumd[9]
        state = sumd[10]
        etl_time = time.strftime('%Y-%m-%d %X', time.localtime())

        i += 1
        insert_mysql_sql = insert_mysql_sql + """('{data_date}','{uid}','{phone_num}','{room_id}','{state}','{etl_time}','{etl_time}','{fans_nickname}','{anchor_nickname}'),""".format(
            host=host, port=port, user=user, passwd=passwd, db=db,
            data_date=data_date, uid=uid, phone_num=phone_num, room_id=room_id, state=state, etl_time=etl_time,
            fans_nickname=fans_nickname, anchor_nickname=anchor_nickname)

        insert_mysql_sql_recallinfo = insert_mysql_sql_recallinfo + """('{uid}','{bigfans_level}','{room_id}','{view_time}','{subscriber_time}','{intimacy}','{state}','{data_date}','{etl_time}','{etl_time}'),""".format(
            host=host, port=port, user=user, passwd=passwd, db=db,
            uid=uid, bigfans_level=bigfans_level, room_id=room_id,
            view_time=view_time, subscriber_time=subscriber_time, intimacy=intimacy, state=state, data_date=data_date,
            etl_time=etl_time)

        if (i % 600 == 0):
            insert_mysql_sql = insert_mysql_sql.rstrip(',') + """ ;" """
            insert_mysql_sql_recallinfo = insert_mysql_sql_recallinfo.rstrip(',') + """ ;" """
            os.system(insert_mysql_sql)
            os.system(insert_mysql_sql_recallinfo)

            insert_mysql_sql = """/usr/bin/mysql -h{host} -P{port} -u{user} -p{passwd} -e "set names utf8;use {db}; \
                   insert into bigfans_recall_record(\`date\`,uid,phone_num,room_id,state,created_time,updated_time,nickname,anchorName) \
                   values """.format(host=host, port=port, user=user, passwd=passwd, db=db)
            insert_mysql_sql_recallinfo = """/usr/bin/mysql -h{host} -P{port} -u{user} -p{passwd} -e "set names utf8;use {db}; \
                   insert into bigfans_recall_info(uid,level,room_id,duration,follow_time,intimacy,state,\`date\`,created_time,updated_time) \
                   values """.format(host=host, port=port, user=user, passwd=passwd, db=db)

    insert_mysql_sql = insert_mysql_sql.rstrip(',') + """ ;" """
    insert_mysql_sql_recallinfo = insert_mysql_sql_recallinfo.rstrip(',') + """ ;" """
    os.system(insert_mysql_sql)
    os.system(insert_mysql_sql_recallinfo)


# run serial Batch
runDay = '2018-07-09'
# dataProcPre(runDay=runDay)
hiveDataSum2Mysql_jiazw(runDay=runDay)

猜你喜欢

转载自blog.csdn.net/babyfish13/article/details/81004123
今日推荐