Data warehouse construction methodology (5): verification template of offline files on FTP server

Summarize the verification template for offline text data extraction on FTP servers that have participated in the past, involving verification rules for file size, number of entries, encryption, number of columns, etc.;

#!/bin/bash
###################################################
#set enviorment
## author=yzg


export LANG=en_US
alias ls='ls --color=auto --time-style=local'
src_path="/data/XX/put_file" 
log_file="/data/XX/log/ftp_d_"`date +%Y%m%d`
log_file_mon="/data/XX/log/ftp_m_"`date +%Y%m`
ftp_path="/data/XX/do_ftp" 
#rowlen_path="/dataXX/cfg/rowlength.cfg"
allownull_path="/data/XX/cfg/allownull.cfg"
isnull_path="/data/XX/cfg/isnull.cfg"

#------------------------------------------------------------
if [ $# != 1 ]
then 
  echo `date +%Y%m%d:%T`"  not one command(unit id)!"
  exit 1
fi
para_len=`echo $1 | wc -c`
# 入参为:21001_20180728 、21002_201807

if [ ${para_len} -gt 14 ]
 then 
     done_log_file=${log_file}"_done.log"
      err_log_file=${log_file}"_err.log"
     info_log_file=${log_file}"_info.log"
 else
     done_log_file=${log_file_mon}"_done.log"
      err_log_file=${log_file_mon}"_err.log"
     info_log_file=${log_file_mon}"_info.log"
fi 

cd ${src_path}
if [ $? -eq 0 ];then
    f=*$1*000.csv
    
    #a_20008_21001_20180728_00_000.csv
    
    echo $f
    if [ -f $f ];then
      for i in $f
      do
        file_num=0
        row_num=0
        #parse filename
        n=`echo $i | awk '{print substr($0,1,length($0)-8)}'`
    	#n=s_20008_OLN_08001_20180729_00
    	#n为上传的版本号
        echo $n
        if [ -f ${done_log_file} ];then 
          grep $n ${done_log_file}
          if [ $? = 0 ];then
            echo `date +%Y%m%d:%T`"  "$n" 重复上传!">>${err_log_file}
            ${sms_path} "$n 重复上传!"
            exit 6
          fi
        fi
        p=`echo $n".verf"`
    	#p=s_20008_OLN_08001_20180729_00.verf
        if [ -f $p ];then
          rm $n".verf"
        fi
    	#s_20008_OLN_21001_20180729_00_000.csv
        #单元ID 21001
        v_unit=`echo $i | awk -F "_" '{print $(NF-3)}'`
        #序号 00
        v_seq=`echo $i | awk -F "_" '{print $(NF-1)}'`
        v_z_type=`echo $i | cut -c 1`
        #周期 20180729
        t=`echo $i | awk -F _ '{print $(NF-2)}'`
        j=$n*.csv
    	#j=s_20008_OLN_21001_20180729_00*.csv
        for l in $j
    	#l=s_20008_OLN_21001_20180729_00_000.csv
        do
          y=`date +%Y`
          s=`ls -l $l`
          r=`echo $s | awk '{print $5}'`
    	  #245(大小)
          z=`echo $r | awk '{if($1>=2147483648)print 0;else print 1;}'`
    	  #判断是否大于2G
          if [ $z = 0 ];then
            echo `date +%Y%m%d:%T`"  "$l" 大小超过2G!">>${err_log_file}
            ${sms_path} "$l 大小超过2G!"
            exit 2
          fi
          if [ $r = 0 ];then
            echo `date +%Y%m%d:%T`"  "$l" 文件大小为0!">>${err_log_file}
            grep ${v_unit} ${allownull_path}
            if [ $? != 0 ];then
              ${sms_path} "$l 文件大小为0!"
              exit 7
            fi
            u=0
          else
            grep ${v_unit} ${isnull_path}
            if [ $? = 0 ];then
              echo `date +%Y%m%d:%T`"  "$l" 文件大小为非空!">>${err_log_file}
              ${sms_path} "$l 文件大小为非空!"
              exit 11
            fi 
          fi	  
    	  #文件条数
    	  num=`cat $l |wc -l`	  
		  
		  #文件的md5值
		  md=`md5sum $l|awk '{print $1}'`
		  
		  #对文件进行压缩
          gzip -f $l	  
		  
		  #得到压缩后的文件大小
		  big=`ls -l $l.gz | awk '{print $5}'`
		  
          echo $l".gz" $big $num $t $md| awk '{printf"%-50s%-20s%-20s%-12s%-32s%\r\n",$1,$2,$3,$4,$5}' >> $n".verf"
		  
        done
        for l in $j.gz
            do
              echo "hello"
    	      #${ftp_path} $l
              if [ $? != 0 ];then
                exit 10
              fi
            done
        echo "hello"
        #${ftp_path} $n".verf"
        if [ $? = 0 ];then
          echo `date +%Y%m%d:%T`"  "$n >>${done_log_file}
        else
          echo `date +%Y%m%d:%T`"  "$n" 文件ftp中断!">>${err_log_file}
          ${sms_path} "$n 文件ftp中断!"
          exit 8
        fi  
    	done
        exit 0   
    else
      echo `date +%Y%m%d:%T`"  "$1" 文件未找到!">>${err_log_file}
      ${sms_path} "$1 文件未找到!"
      exit 9
    fi
else
echo "change file failed!"
echo "change file failed!">>${err_log_file}
fi

 

Guess you like

Origin blog.csdn.net/yezonggang/article/details/109257817