数据仓库构建方法论(五):FTP服务器上离线文件的校验模板

总结以往参与过的FTP服务器上,离线文本数据抽取的校验模板,涉及对文件的大小、条数、加密、列数等的校验规则;

#!/bin/bash
###################################################
#set enviorment
## author=yzg


export LANG=en_US
alias ls='ls --color=auto --time-style=local'
src_path="/data/XX/put_file" 
log_file="/data/XX/log/ftp_d_"`date +%Y%m%d`
log_file_mon="/data/XX/log/ftp_m_"`date +%Y%m`
ftp_path="/data/XX/do_ftp" 
#rowlen_path="/dataXX/cfg/rowlength.cfg"
allownull_path="/data/XX/cfg/allownull.cfg"
isnull_path="/data/XX/cfg/isnull.cfg"

#------------------------------------------------------------
if [ $# != 1 ]
then 
  echo `date +%Y%m%d:%T`"  not one command(unit id)!"
  exit 1
fi
para_len=`echo $1 | wc -c`
# 入参为:21001_20180728 、21002_201807

if [ ${para_len} -gt 14 ]
 then 
     done_log_file=${log_file}"_done.log"
      err_log_file=${log_file}"_err.log"
     info_log_file=${log_file}"_info.log"
 else
     done_log_file=${log_file_mon}"_done.log"
      err_log_file=${log_file_mon}"_err.log"
     info_log_file=${log_file_mon}"_info.log"
fi 

cd ${src_path}
if [ $? -eq 0 ];then
    f=*$1*000.csv
    
    #a_20008_21001_20180728_00_000.csv
    
    echo $f
    if [ -f $f ];then
      for i in $f
      do
        file_num=0
        row_num=0
        #parse filename
        n=`echo $i | awk '{print substr($0,1,length($0)-8)}'`
    	#n=s_20008_OLN_08001_20180729_00
    	#n为上传的版本号
        echo $n
        if [ -f ${done_log_file} ];then 
          grep $n ${done_log_file}
          if [ $? = 0 ];then
            echo `date +%Y%m%d:%T`"  "$n" 重复上传!">>${err_log_file}
            ${sms_path} "$n 重复上传!"
            exit 6
          fi
        fi
        p=`echo $n".verf"`
    	#p=s_20008_OLN_08001_20180729_00.verf
        if [ -f $p ];then
          rm $n".verf"
        fi
    	#s_20008_OLN_21001_20180729_00_000.csv
        #单元ID 21001
        v_unit=`echo $i | awk -F "_" '{print $(NF-3)}'`
        #序号 00
        v_seq=`echo $i | awk -F "_" '{print $(NF-1)}'`
        v_z_type=`echo $i | cut -c 1`
        #周期 20180729
        t=`echo $i | awk -F _ '{print $(NF-2)}'`
        j=$n*.csv
    	#j=s_20008_OLN_21001_20180729_00*.csv
        for l in $j
    	#l=s_20008_OLN_21001_20180729_00_000.csv
        do
          y=`date +%Y`
          s=`ls -l $l`
          r=`echo $s | awk '{print $5}'`
    	  #245(大小)
          z=`echo $r | awk '{if($1>=2147483648)print 0;else print 1;}'`
    	  #判断是否大于2G
          if [ $z = 0 ];then
            echo `date +%Y%m%d:%T`"  "$l" 大小超过2G!">>${err_log_file}
            ${sms_path} "$l 大小超过2G!"
            exit 2
          fi
          if [ $r = 0 ];then
            echo `date +%Y%m%d:%T`"  "$l" 文件大小为0!">>${err_log_file}
            grep ${v_unit} ${allownull_path}
            if [ $? != 0 ];then
              ${sms_path} "$l 文件大小为0!"
              exit 7
            fi
            u=0
          else
            grep ${v_unit} ${isnull_path}
            if [ $? = 0 ];then
              echo `date +%Y%m%d:%T`"  "$l" 文件大小为非空!">>${err_log_file}
              ${sms_path} "$l 文件大小为非空!"
              exit 11
            fi 
          fi	  
    	  #文件条数
    	  num=`cat $l |wc -l`	  
		  
		  #文件的md5值
		  md=`md5sum $l|awk '{print $1}'`
		  
		  #对文件进行压缩
          gzip -f $l	  
		  
		  #得到压缩后的文件大小
		  big=`ls -l $l.gz | awk '{print $5}'`
		  
          echo $l".gz" $big $num $t $md| awk '{printf"%-50s%-20s%-20s%-12s%-32s%\r\n",$1,$2,$3,$4,$5}' >> $n".verf"
		  
        done
        for l in $j.gz
            do
              echo "hello"
    	      #${ftp_path} $l
              if [ $? != 0 ];then
                exit 10
              fi
            done
        echo "hello"
        #${ftp_path} $n".verf"
        if [ $? = 0 ];then
          echo `date +%Y%m%d:%T`"  "$n >>${done_log_file}
        else
          echo `date +%Y%m%d:%T`"  "$n" 文件ftp中断!">>${err_log_file}
          ${sms_path} "$n 文件ftp中断!"
          exit 8
        fi  
    	done
        exit 0   
    else
      echo `date +%Y%m%d:%T`"  "$1" 文件未找到!">>${err_log_file}
      ${sms_path} "$1 文件未找到!"
      exit 9
    fi
else
echo "change file failed!"
echo "change file failed!">>${err_log_file}
fi

猜你喜欢

转载自blog.csdn.net/yezonggang/article/details/109257817