Linux备份-删除指定日期内文件

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/anitinaj/article/details/80901491
#!/usr/bin/env bash

source /etc/profile

echo " *************** start filter ***************  "
# get befor six month last day
#m0=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d)
#echo ${m0}
#m1=$(date -d "$(date -d '0 month' +%Y%m01) -1 day" +%Y%m%d)
#echo ${m1}
#m2=$(date -d "$(date -d last-month +%Y%m01) -1 day" +%Y%m%d)
#echo ${m2}
#m3=$(date -d "$(date -d ${m2} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m3}
#m4=$(date -d "$(date -d ${m3} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m4}
#m5=$(date -d "$(date -d ${m4} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m5}
#m6=$(date -d "$(date -d ${m5} +%Y%m01) -1 day" +%Y%m%d)
#echo ${m6}

# 取得当前月的最后一天,访问数组长度:${#m[*]} + ${#m[@]}
m [ 0 ]= $(date -d " $(date -d ' month ' +%Y%m01) -1 day " +%Y%m%d)
echo m0 : ${m[ 0 ]} ' month : ' ${#m[ @ ]}
for n in $(seq 0 11 ) ; do
    m [ $n + 1 ]= $(date -d " $(date -d ${m[$n]} +%Y%m01) -1 day " +%Y%m%d)
    echo m $[$n+ 1 ] : ${m[$n + 1 ]} ' month : ' ${#m[ * ]} ;
done

echo " ****** time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** "

max_date = 0
# get the latest file and copy to hdfs
cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter
for dir in $( ls -l ./ | awk ' /^d/{print $NF} ' )
do
   if [[ -d $dir && $dir == *\_* ]] ; then
      f_d = $( echo $dir | cut -d \_ -f 3 | cut -d \. -f 1 )
      if [[ $max_date < $f_d ]] ; then
        max_date = $f_d
        max_filter = $dir
      fi
   fi
done
echo " max date is : " $max_date
echo " max filter is : " $max_filter
pwd
# 复制最近日期的filter文件到hdfs
hadoop fs -test -e /data/datacenter/run_center_spark_stream/bloom_filters/ $max_filter
if [[ $? == 0 ]] ; then
    echo " filter is already exist : " $max_filter
else
    echo " start hdfs copy "
    echo " ****** start time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** "
    hadoop fs -put $max_filter /data/datacenter/run_center_spark_stream/bloom_filters
    echo " ****** end time : " $(date ' +%Y-%m-%d %H:%M:%S ' ) " ****** "
fi

remove_week = $(date -d " $max_date 7 days ago " +%Y%m%d)
echo " 删除本地序列化文件的日期界限: " $remove_week
remove_date = $(date -d " $max_date 30 days ago " +%Y%m%d)
echo " 删除文件 和 Hadoop filter 的日期界限: " $remove_date

echo " *************** start remove filter ***************  "
for r_dir in $( ls -l ./ | awk ' /^d/{print $NF} ' )
do
   if [[ -d $r_dir && $r_dir == *\_* ]] ; then
      r_d = $( echo $r_dir | cut -d \_ -f 3 | cut -d \. -f 1 )
      if [[ $r_d < $remove_date ]] ; then
          if [[ ${m[ * ]} == * $r_d * ]] ; then
              cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/ $r_dir
              pwd
              for f_dir in $( ls *)
              do
                 if [[ " $f_dir " == "mau_device_all.FILTER.SER" ]] ; then
                    echo " ------ keep mau_filter is: " $f_dir ;
                 else
                    echo " remove file is: " $f_dir ;
                    rm -r $f_dir
                 fi
              done
              cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter
              pwd
          else
              echo " remove filter_dir is: " $r_dir
              rm -r $r_dir
          fi
      elif [[ $r_d < $remove_week ]] ; then
          if [[ $r_d == $m0 || $r_d == $m1 || $r_d == $m2 ]] ; then
              cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/ $r_dir
              pwd
              for f_dir in $( ls *)
              do
                 if [[ " $f_dir " == "mau_device_all.FILTER.SER" ]] ; then
                    echo " ------ week keep mau_filter is: " $f_dir ;
                 else
                    if [[ " $f_dir " == *.FILTER.SER ]] ; then
                        echo " - last day of month - week remove file is: " $f_dir ;
                        rm -r $f_dir
                    fi
                 fi
              done
              cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter
              pwd
          else
              echo " week remove filter is: " $r_dir
              rm -r $r_dir /*.FILTER.SER
          fi
      fi
   fi
done

echo " =============== start remove hdfs filter ===============  "
# 删除hdfs上指定日期外的tdid
for h_filter in $(hadoop fs -ls /data/datacenter/run_center_spark_stream/bloom_filters | awk ' {print $8} ' )
do
    if [[ $h_filter == *\_* ]] ; then
        h_date = $( echo $h_filter | cut -d \/ -f 6 | cut -d \_ -f 3 | cut -d \. -f 1 )
#        echo " hdfs date : "$h_date
#        echo " hdfs filter : "$h_filter
        if [[ ${m[ * ]} == * $h_date * ]] ; then
            echo " remain hdfs filter is : " $h_filter
        elif [[ $h_date < $remove_date ]] ; then
            echo " not remain date is : " $h_date
            echo " remove hdfs filter is : " $h_filter
            hadoop fs -rmr $h_filter
        fi
    fi
done

echo " -------------- start tdid ---------------  "
# 删除小于30天的tdid
cd /home/hadoop/streaming_run_center/tmp/checkpoint/tdidinfo
for tdid in $( ls *)
do
    if [[ $tdid == *\_* ]] ; then
        t_d = $( echo $tdid | cut -d \_ -f 2 | cut -d \. -f 1 )
        if [[ $t_d == $max_date || $t_d > $max_date ]] ; then
            echo " need copy date : " $t_d
            echo " need copy tdid : " $tdid
            # 检查tdid是否存在
#            hadoop fs -test -e jiaojiao/tdid/$tdid
#            if [[ $? == 0 ]]; then
#                echo " tdid is already exist,remove it first "
#                hadoop fs -rm jiaojiao/tdid/$tdid
#                hadoop fs -put $tdid jiaojiao/tdid
#            else
#                echo " start copy "
#                hadoop fs -put $tdid jiaojiao/tdid
#            fi
        elif [[ $t_d < $remove_date ]] ; then
            echo " remove tdid : " $tdid
            rm $tdid
        fi
    fi
done

#echo " =============== start remove hdfs tdid ===============  "
#for h_tdid in $(hadoop fs -ls jiaojiao/tdid | awk '{print $8}')
#do
#    if [[ $h_tdid == *\_* ]]; then
#        h_date=$(echo $h_tdid | cut -d \_ -f 2 | cut -d \. -f 1)
#        echo $h_date
#        echo $h_tdid
#    fi
#done

猜你喜欢

转载自blog.csdn.net/anitinaj/article/details/80901491