datax全流程脚本

#!/bin/bash
# ~/datax/bin/start_datax.sh

. ~/.bashrc
echo $#
if [ $# -lt 1 ]
then
    echo "need job config file"
    exit
fi
jobname="$1"
taskname=`echo $(basename $jobname .xml)`

echo "$jobname,taskname=$taskname"
DATAX_HOME=/home/work/datax
timenow=` date +'%H%M%S'`
cd $DATAX_HOME
outputfile="${DATAX_HOME}/log/${taskname}.log"
errorfile="${DATAX_HOME}/log/${taskname}.error"
rm ${outputfile} -f
python ${DATAX_HOME}/bin/datax.py ${jobname} >> ${outputfile} 2>${errorfile}
if [ $? -eq 0 ]
then
    tail  -n32 ${outputfile}
    exit 0
else
    tail  -n32 ${outputfile}
    exit 102
fi
tail  -n32 ${outputfile}
exit 103
#!/bin/bash
# core_incr.sh
. ~/.bashrc
start_date=`date -d "1 days ago" +%Y-%m-%d`
if [ $# -gt 0  ]
then
    start_date=$1
fi
end_date=`date -d "${start_date} 1 days " "+%Y-%m-%d"`
schema="core"
tbname="t_trans"
echo ${start_date} ${end_date}

example="/home/work/datax/job/core/example"
tempdir="/home/work/datax/job/core"
basepath=$(cd `dirname $0`; pwd)
cd $basepath
ARR=(1 2 3 4 5 6 7 8)
for ID in ${ARR[*]}
do
    jsonfile="${tempdir}/${schema}_${tbname}_$ID"
    oneexample="${example}/t_trans_example_$ID"
    sed "s/START_DATE/${start_date}/g;s/END_DATE/${end_date}/g" ${oneexample} > ${jsonfile}
done
echo "${jsonfile} done"
ARR=(1 2 3 4 5 6 7 8)
for ID in ${ARR[*]}
do
    echo "~/datax/log/${schema}_${tbname}_$ID.log"
    ~/datax/bin/start_datax.sh /home/work/datax/job/core/${schema}_${tbname}_$ID > /home/work/daiyuanpei/core_incr_$ID.out && bash /home/work/datax/job/dataxload_core.sh core t_trans p$ID >> /home/work/daiyuanpei/core_incr_$ID.out 2>&1 &
    sleep 3m
done
echo "core_incr.sh done"
#!/bin/bash
# /home/work/datax/job/dataxload_core.sh
. ~/.bashrc
if [ $# -lt 3 ]
then
    echo "./loaddata schema tablename hostindex"
    exit
fi
export LANG="zh_CN.UTF-8"
export LC_ALL="zh_CN.UTF-8"
schema=$1
tablename=$2
hostindex=$3
data_path=" /home/work/datax/ext/core/${tablename}_${hostindex}"
basepath=$(cd `dirname $0`; pwd)
cd ${basepath}
if [ ! -e ${data_path} ];then
 echo "no exist filepath:${data_path}"
 exit
fi

LOAD_SQL="insert into ${schema}.${tablename} select to_date(F_create_time::text,'YYYY-MM-DD'),* from pay_data_ext.r_ext_${tablename}_${hostindex};"

tempfile="${basepath}/temp.log"
export PGPASSWORD=************
/usr/bin/psql -h **.***.**.*** -p 5432 -d qq_gp_db -U gp_rd -c "$LOAD_SQL" > ${tempfile} 2>&1
if [ $? -eq 0 ]
   then
        logmsg=`cat ${tempfile}`
        echo "load to success ${tablename}:${logmsg}"
        emailTitle="[云上][${schema}.${tablename}_${hostindex}] load success"
        emailContent=`echo "datax load to gppay success: ${schema}.${tablename}_${hostindex}: ${logmsg}"`
     else
        logmsg=`cat ${tempfile}`
        msg=`echo "datax load to gppay failed:${tablename}:${logmsg}"`
        emailTitle="[云上][${schema}.${tablename}_${hostindex}] load failed"
        emailContent=`echo "datax load to gppay failed: ${schema}.${tablename}_${hostindex}: ${logmsg}"`
	sleep 9s
	nohup /bin/bash /home/work/daiyuanpei/retry_dataxload_core.sh {$schema} ${tablename} ${hostindex} > /home/work/daiyuanpei/retry_${tablename}_${hostindex}.out 2>&1 &
fi
echo ${emailContent} | mail -s "${emailTitle}" [email protected] -c "[email protected] [email protected]"
#!/bin/bash
# /home/work/daiyuanpei/retry_dataxload_core.sh
. ~/.bashrc
if [ $# -lt 3 ]
then
    echo "./loaddata schema tablename hostindex"
    exit
fi
export LANG="zh_CN.UTF-8"
export LC_ALL="zh_CN.UTF-8"
schema=$1
tablename=$2
hostindex=$3
data_path=" /home/work/datax/ext/core/${tablename}_${hostindex}"
basepath=$(cd `dirname $0`; pwd)
cd ${basepath}
if [ ! -e ${data_path} ];then
 echo "no exist filepath:${data_path}"
 exit
fi

LOAD_SQL="insert into ${schema}.${tablename} select to_date(F_create_time::text,'YYYY-MM-DD'),* from pay_data_ext.r_ext_${tablename}_${hostindex};"

tempfile="${basepath}/temp.log"
export PGPASSWORD=************
/usr/bin/psql -h **.***.**.*** -p 5432 -d qq_gp_db -U gp_rd -c "$LOAD_SQL" > ${tempfile} 2>&1
if [ $? -eq 0 ]
   then
        logmsg=`cat ${tempfile}`
        echo "load to success ${tablename}:${logmsg}"
        emailTitle="[重试][${schema}.${tablename}_${hostindex}] load success"
        emailContent=`echo "datax load to gppay success: ${schema}.${tablename}_${hostindex}: ${logmsg}"`
     else
        logmsg=`cat ${tempfile}`
        msg=`echo "datax load to gppay failed:${tablename}:${logmsg}"`
        emailTitle="[重试][${schema}.${tablename}_${hostindex}] load failed"
        emailContent=`echo "datax load to gppay failed: ${schema}.${tablename}_${hostindex}: ${logmsg}"`
	touch /home/work/daiyuanpei/monitor/${tablename}
fi
echo ${emailContent} | mail -s "${emailTitle}" [email protected] -c "[email protected] [email protected]"
#!/bin/bash
# monitor.sh
# 3 8 * * * (cd /home/work/daiyuanpei; bash ./monitor.sh > ct_monitor.out 2>&1 &)
. ~/.bashrc

current_date=`date +%Y-%m-%d`
echo ${current_date}
basepath=$(cd `dirname $0`; pwd)
cd $basepath
BAK=/home/work/daiyuanpei/monitor
if [ "`ls -A ${BAK}`" = "" ];
then
	echo "${BAK} is empty"
else
        echo "${BAK} is not empty"
	/usr/local/bin/scp -i /home/work/.ssh/id_rsa /home/work/daiyuanpei/monitor/t_* work@**.***.**.***:/home/work/daiyuanpei/monitor
	echo "file move done"
	rm ./monitor/t_*
	echo "empty folder done"
fi
{
    "job": {
        "content": [{
            "reader": {
                "name": "mysqlreader",
                "parameter": {
                    "column": [
                        "F_trans_id",
                        "F_enabled",
                        "F_trans_type",
                        "F_payment_mode",
                        "F_pay_type",
                        "F_state",
                        "F_create_time",
                        "F_create_time_ext",
                        "REPLACE(REPLACE(F_end_time, 'null', '1970-01-01 00:00:00'), '0000-00-00 00:00:00', '1970-01-01 00:00:00')",
                        "REPLACE(REPLACE(F_end_time_ext, 'null', '1970-01-01 00:00:00'), '0000-00-00 00:00:00', '1970-01-01 00:00:00')",
                        "REPLACE(REPLACE(F_pay_time, 'null', '1970-01-01 00:00:00'), '0000-00-00 00:00:00', '1970-01-01 00:00:00')",
                        "REPLACE(REPLACE(F_pay_time_ext, 'null', '1970-01-01 00:00:00'), '0000-00-00 00:00:00', '1970-01-01 00:00:00')",
                        "F_modify_time",
                        "F_goods_url",
                        "REPLACE(REPLACE(F_goods_name, '\\n', '\\\\n'), '\\t', '')",
                        "F_product_type"
                    ],
                    "where":"f_modify_time>='2020-03-10' and f_create_time>='2020-03-10' and f_create_time<'2020-03-11' ",
                    "connection":[
                        {"jdbcUrl":["jdbc:mysql://ip:port/mysql_db_00"],"table":["t_trans_00_[0-9]"]},
                        {"jdbcUrl":["jdbc:mysql://ip:port/mysql_db_99"],"table":["t_trans_99_[0-9]"]},
                    ],
                    "mandatoryEncoding":"GBK",
                    "jdbc_encoding":"CP1252",
                    "username": "username",
                    "password": "password"
                }
            },
            "writer": {
                "name": "streamwriter",
                "parameter": {
                "cryptMode": "no",
                "roleType": "",
                "fieldDelimiter": "\t",
                "fileName": "t_trans_p1",
                "groupname": "qq_gp_db",
                "lineDelimiter": "\n",
                "nullFormat": "\\N",
                "path": "/home/work/datax/ext/core",
                "writeMode": "truncate"
                }
            }
        }],
        "setting": {
            "speed": {
                "channel": 5,
                "byte": 20971520
            }
        }
    }
}
发布了31 篇原创文章 · 获赞 9 · 访问量 2万+

猜你喜欢

转载自blog.csdn.net/sosemseo/article/details/104803624