DataX数据同步

安装配置

wget http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz
tar -xvf datax.tar.gz
vi ~/.bash_profile
export DATAX_HOME=/opt/app/datax
source ~/.bash_profile

同步脚本

在$DATAX_HOME/job目录下,编写同步脚本:

{
        "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader", 
                    "parameter": {
                        "column": ["id","app_application_type","city_id","user_id","site_id","app_id","app_version","sequence","device_id","device_type","carrier","os_version","brand","devicemodel","bundle_id","geo","remote_ip","name","request_header","parameter","response","status","create_user","create_time","update_user","update_time","request_time"], 
                        "connection": [
                            {
                                "table": ["$table"],
                                "jdbcUrl": ["jdbc:mysql://10.10.4.202/scene?useSSL=false&useUnicode=true&characterEncoding=UTF-8"]
                            }
                        ], 
			            "username": "root",
                        "password": "123456", 
                        "splitPk": "id"
                    }
                }, 
                "writer": {
                    "name": "mysqlwriter", 
                    "parameter": {
                        "column": ["id","app_application_type","city_id","user_id","site_id","app_id","app_version","sequence","device_id","device_type","carrier","os_version","brand","devicemodel","bundle_id","geo","remote_ip","name","request_header","parameter","response","status","create_user","create_time","update_user","update_time","request_time"], 
                        "writeMode": "replace",
                        "connection": [
                            {
                                "jdbcUrl": "jdbc:mysql://10.10.4.202/scene?useSSL=false&useUnicode=true&characterEncoding=UTF-8", 
                                "table": ["tb_scene_site_request_event_log"]
                            }
                        ],
			            "username": "root", 
                        "password": "123456" 
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                 "byte": 1048576,
                 "channel": 5
            } 
        }
    }
}

脚本配置说明:
https://github.com/alibaba/DataX/blob/master/mysqlreader/doc/mysqlreader.md
https://github.com/alibaba/DataX/blob/master/mysqlwriter/doc/mysqlwriter.md

Python定时任务

1、确保已经安装Python3,如果没有按如下命令安装:

wget https://www.python.org/ftp/python/3.6.5/Python-3.6.5.tar.xz
tar -xvJf Python-3.6.5.tar.xz

yum -y install zlib zlib-devel openssl-devel
mkdir /usr/local/python3
./configure --prefix=/usr/local/python3 --with-ssl
make && make install

ln -s /usr/local/python3/bin/python3 /usr/bin/python3
ln -s /usr/local/python3/bin/pip3 /usr/bin/pip3

验证是否安装成功:python3 -V

2、安装PyMySQL和apscheduler

pip3 install PyMySQL
pip3 install apscheduler

3、定时脚本

#!/usr/bin/python3
# -*- coding: UTF-8 -*-

from apscheduler.schedulers.blocking import BlockingScheduler
import os
import datetime
import pymysql
import threading

#获取昨日日期
def getLastDate(): 
        return (datetime.datetime.now() - datetime.timedelta(days = 1)).strftime('%Y%m%d')

# 同步线程
class syncerThread (threading.Thread):
        def __init__(self, threadID, name):
                threading.Thread.__init__(self)
                self.threadID = threadID
                self.name = name
        def run(self):
                ad_syncer(self.name)

# 数据同步
def ad_syncer(name) :
        print('同步日期:{0}'.format(getLastDate()))
        table = name + getLastDate()
        db = pymysql.connect("10.10.4.202","root","123456","scene")
        cursor = db.cursor()
        cursor.execute("SHOW TABLES LIKE '" + table + "'")
        data = cursor.fetchone()
        if data != None:
                print("开始同步{0}".format(table))
                os.system('python $DATAX_HOME/bin/datax.py --jvm="-Xms2G -Xmx2G"  -p "-Dtable=' + str(table) + '" $DATAX_HOME/job/' + name + '.json')
                print("同步{0}完成".format(table))
        else:
                print('{0}不存在'.format(table))
        db.close()

# 启动
def start_syncer() :
        # 创建线程
        syncer1 = syncerThread(1, "tb_scene_site_request_event_log") # A计划请求日志
        # 启动线程
        syncer1.start()
        syncer1.join()
        print ('同步结束')
   
if __name__ == '__main__':
        scheduler = BlockingScheduler()
        scheduler.add_job(start_syncer, 'cron', hour='2')    
        print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
        try:
                scheduler.start()
        except (KeyboardInterrupt, SystemExit):
                scheduler.shutdown()

4、相关命令
1)启动

nohup python3 scene_site_syncer.py &

2)停止

#!/usr/bin/python
#coding=utf-8
import sys,os
def kill_crawler(id):
    cmd = 'ps -ef | grep python'
    f = os.popen(cmd)
    txt = f.readlines()
    for line in txt:
        colum = line.split()
        pid = colum[1]
        name = colum[-1]
        if name.startswith('scene_site_syncer'):
            task_id = name[9:-3]
            if task_id == id or id =='0':
                cmd = "kill -9 %d" % int(pid)
                rc = os.system(cmd)
                if rc == 0 : 
                    print "stop \"%s\" success!!" % name
                else:
                    print "stop \"%s\" failed!!" % name

if __name__ =='__main__':
   
    if not  len(sys.argv)==2:
        print u'输入要结束的任务编号,0代表停止所有'
        sys.exit()
    id = sys.argv[1]
    kill_crawler(id)

猜你喜欢

转载自blog.csdn.net/m0_37261091/article/details/87900426