安装配置
wget http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz
tar -xvf datax.tar.gz
vi ~/.bash_profile
export DATAX_HOME=/opt/app/datax
source ~/.bash_profile
同步脚本
在$DATAX_HOME/job目录下,编写同步脚本:
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": ["id","app_application_type","city_id","user_id","site_id","app_id","app_version","sequence","device_id","device_type","carrier","os_version","brand","devicemodel","bundle_id","geo","remote_ip","name","request_header","parameter","response","status","create_user","create_time","update_user","update_time","request_time"],
"connection": [
{
"table": ["$table"],
"jdbcUrl": ["jdbc:mysql://10.10.4.202/scene?useSSL=false&useUnicode=true&characterEncoding=UTF-8"]
}
],
"username": "root",
"password": "123456",
"splitPk": "id"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": ["id","app_application_type","city_id","user_id","site_id","app_id","app_version","sequence","device_id","device_type","carrier","os_version","brand","devicemodel","bundle_id","geo","remote_ip","name","request_header","parameter","response","status","create_user","create_time","update_user","update_time","request_time"],
"writeMode": "replace",
"connection": [
{
"jdbcUrl": "jdbc:mysql://10.10.4.202/scene?useSSL=false&useUnicode=true&characterEncoding=UTF-8",
"table": ["tb_scene_site_request_event_log"]
}
],
"username": "root",
"password": "123456"
}
}
}
],
"setting": {
"speed": {
"byte": 1048576,
"channel": 5
}
}
}
}
脚本配置说明:
https://github.com/alibaba/DataX/blob/master/mysqlreader/doc/mysqlreader.md
https://github.com/alibaba/DataX/blob/master/mysqlwriter/doc/mysqlwriter.md
Python定时任务
1、确保已经安装Python3,如果没有按如下命令安装:
wget https://www.python.org/ftp/python/3.6.5/Python-3.6.5.tar.xz
tar -xvJf Python-3.6.5.tar.xz
yum -y install zlib zlib-devel openssl-devel
mkdir /usr/local/python3
./configure --prefix=/usr/local/python3 --with-ssl
make && make install
ln -s /usr/local/python3/bin/python3 /usr/bin/python3
ln -s /usr/local/python3/bin/pip3 /usr/bin/pip3
验证是否安装成功:python3 -V
2、安装PyMySQL和apscheduler
pip3 install PyMySQL
pip3 install apscheduler
3、定时脚本
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
from apscheduler.schedulers.blocking import BlockingScheduler
import os
import datetime
import pymysql
import threading
#获取昨日日期
def getLastDate():
return (datetime.datetime.now() - datetime.timedelta(days = 1)).strftime('%Y%m%d')
# 同步线程
class syncerThread (threading.Thread):
def __init__(self, threadID, name):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
def run(self):
ad_syncer(self.name)
# 数据同步
def ad_syncer(name) :
print('同步日期:{0}'.format(getLastDate()))
table = name + getLastDate()
db = pymysql.connect("10.10.4.202","root","123456","scene")
cursor = db.cursor()
cursor.execute("SHOW TABLES LIKE '" + table + "'")
data = cursor.fetchone()
if data != None:
print("开始同步{0}".format(table))
os.system('python $DATAX_HOME/bin/datax.py --jvm="-Xms2G -Xmx2G" -p "-Dtable=' + str(table) + '" $DATAX_HOME/job/' + name + '.json')
print("同步{0}完成".format(table))
else:
print('{0}不存在'.format(table))
db.close()
# 启动
def start_syncer() :
# 创建线程
syncer1 = syncerThread(1, "tb_scene_site_request_event_log") # A计划请求日志
# 启动线程
syncer1.start()
syncer1.join()
print ('同步结束')
if __name__ == '__main__':
scheduler = BlockingScheduler()
scheduler.add_job(start_syncer, 'cron', hour='2')
print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
try:
scheduler.start()
except (KeyboardInterrupt, SystemExit):
scheduler.shutdown()
4、相关命令
1)启动
nohup python3 scene_site_syncer.py &
2)停止
#!/usr/bin/python
#coding=utf-8
import sys,os
def kill_crawler(id):
cmd = 'ps -ef | grep python'
f = os.popen(cmd)
txt = f.readlines()
for line in txt:
colum = line.split()
pid = colum[1]
name = colum[-1]
if name.startswith('scene_site_syncer'):
task_id = name[9:-3]
if task_id == id or id =='0':
cmd = "kill -9 %d" % int(pid)
rc = os.system(cmd)
if rc == 0 :
print "stop \"%s\" success!!" % name
else:
print "stop \"%s\" failed!!" % name
if __name__ =='__main__':
if not len(sys.argv)==2:
print u'输入要结束的任务编号,0代表停止所有'
sys.exit()
id = sys.argv[1]
kill_crawler(id)