datax deletes partition data and then writes it into the MySQL script

#! /bin/bash

DATAX_HOME=/opt/module/datax

#1、判断参数是否传入
if [ $# -lt 1 ]
then
	echo "必须传入all/表名..."
	exit
fi
#2、判断日期是否传入
[ "$2" ] && datestr=$2 || datestr=$(date -d '-1 day' +%F)

#DataX导出路径不允许存在空文件,该函数作用为清理空文件
handle_export_path(){
    
    
  for i in $(hadoop fs -ls -R "$1" | awk '{print $8}'); do
    hadoop fs -test -z "$i"
    if [[ $? -eq 0 ]]; then
      echo "$i 文件大小为0,正在删除"
      hadoop fs -rm -r -f "$i"
    fi
  done
}

#数据导出
export_data() {
    
    
  datax_config=$1
  export_dir=$2
	#执行sql
	tableNames=("${@:3}")
	#遍历所有表,拼接每个表的数据加载sql语句
	for table in "${tableNames[@]}"
	do
	  ssh hadoop102 "mysql -uroot -pmivbAs7Awc -e \"use ticket; delete from ${table} where end_date = '${datestr}'\""
	done

  handle_export_path "$export_dir"
  python "$DATAX_HOME/bin/datax.py" -p"-Dexportdir=$export_dir"  "$datax_config"
}

case $1 in
"ads_ticket_respond_statistics")
  export_data "/opt/module/datax/job/export/ticket/ticket.ads_ticket_respond_statistics.json" "/warehouse/ticket/ads/ads_ticket_respond_statistics/dt=${datestr}" "ads_ticket_respond_statistics"
  ;;
"ads_ticket_subpar_statistics")
  export_data "/opt/module/datax/job/export/ticket/ticket.ads_ticket_subpar_statistics.json" "/warehouse/ticket/ads/ads_ticket_subpar_statistics/dt=${datestr}" "ads_ticket_subpar_statistics"
  ;;
"all")
  export_data "/opt/module/datax/job/export/ticket/ticket.ads_ticket_respond_statistics.json" "/warehouse/ticket/ads/ads_ticket_respond_statistics/dt=${datestr}" "ads_ticket_respond_statistics"
  export_data "/opt/module/datax/job/export/ticket/ticket.ads_ticket_subpar_statistics.json" "/warehouse/ticket/ads/ads_ticket_subpar_statistics/dt=${datestr}" "ads_ticket_subpar_statistics"
  ;;
esac

Guess you like

Origin blog.csdn.net/m0_37759590/article/details/132503295