数仓流程的报表设计及应用

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/shuimofengyang/article/details/84523670

1.背景:先用sqoop全量抽取数据,再增量抽取,整合到全量表中,作为ods层;进行业务处理到作为dwd层,结果到dm层;再sqoop将结果抽到mysql。

ods层:

#!/bin/bash

# ********************************************************************************
# 程序名称:    online_tab_user_order
# 功能描述:    将mysql中online_tab_user_order表数据sqoop全量抽取到hive中
# 输入参数:        
#              
# 输入资源:    
# 输出资源:    
#                
# 中间资源:    
# 创建人员:    csq    
# 创建日期:    
# 版本说明:
# 修改人员:    
# 修改日期:    
# 修改原因:   
# 版本说明:   
#              
# ********************************************************************************
# ********************************************************************************


VC_DBLIST='10.68.21.92,3306,hue,"xxx",user_online'
VC_DBLIST1='10.68.21.92,3306,hue,"xxx",user_online_other'

HIVE_SERVER='10.68.25.198:10000'
export HADOOP_USER_NAME=hue页面账号(hadoop集群账号)
dblist=${VC_DBLIST}
dbhost=`echo "${dblist}" |awk -F, '{print $1}'`
dbport=`echo "${dblist}" |awk -F, '{print $2}'`
dbuser=`echo "${dblist}" |awk -F, '{print $3}'`
dbpw=`echo "${dblist}" |awk -F, '{print $4}'`
dbname=`echo "${dblist}" |awk -F, '{print $5}'`

if [ $# -eq 0 ];
  then
    p_in_time_str=`date -d @date -d today +'%Y-%m-%d'' 00:00:00'`
    p_in_time_end=$p_in_time_str
    
  elif [ $# -eq 1 ];
    then
      p_in_time_str=$1
      p_in_time_end=$1 
  elif [ $# -eq 2 ];
    then 
      p_in_time_str=$1
      p_in_time_end=$2
  else 
    p_in_time_str=$1
    p_in_time_end=$2
fi

vc_stat_st=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`
vc_stat_start=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`
vc_stat_ed=`date -d "$p_in_time_end  next-day" +'%Y-%m-%d'' 00:00:00'`

vi_load_st=`date -d "$p_in_time_str" +'%Y%m%d'`
vi_load_ed=`date -d "$p_in_time_end" +'%Y%m%d'`
vc_load_st=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`

vi_stat_st=`date -d "$p_in_time_str 1 day ago" +'%Y-%m-%d'' 00:00:00'`
vi_stat_ed=`date -d "$p_in_time_end" +'%Y-%m-%d'' 00:00:00'`
vi_stat=`date -d "$p_in_time_str  1 day ago" +'%Y%m%d'`

vi_part_drop=`date -d "$p_in_time_str 1080 day ago" +'%Y%m%d'`

echo $p_in_time_str","$p_in_time_end","$vc_stat_st","$vc_stat_ed","$vi_stat_st","$vi_stat_ed

sqoop_time=`date -d @date -d today +'%Y-%m-%d'`
qt=`date -d "2 days ago" +'%Y-%m-%d'' 24:00:00'`
ye=`date -d yesterday +'%Y-%m-%d'' 24:00:00'`

{#先建表,其实也可以不建表,因为sqoop会自主对应映射mysql表到hive。但要想用自己指定的字段类型,先建表比较好
    beeline -u jdbc:hive2://${HIVE_SERVER} -n ${HADOOP_USER_NAME}  -e "
    drop table online_ods.online_all_tab_user_order;
    create table online_ods.online_all_tab_user_order(
     order_id string,
  brand_name string,
  channel_name string,
  county_name string,
  create_date string,
  customer_type_name string,
  customer_type_name_lv1 string,
  des_branch_name string,
  name_city string,
  name_province string,
  price string,
  product_mode_name string,
  product_name string,
  product_spec_name string,
  product_type_name string,
  quantity string,
  report_date string,
  salesmoney string,
  shop_id string,
  shop_name string,
  shopper_addr string,
  shopper_name string,
  shopper_phone string,
  subcompany_name string,
  user_id string,
  coupons1 string,
  coupons2 string,
  coupons3 string,
  confirm_date string,
  work_create_date string)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\n' STORED AS TEXTFILE;"
}&&{#mysql中是分库分表的,分表就只能挨个循环读取了
for table in tab_user_order_0 tab_user_order_1 tab_user_order_2 tab_user_order_3 tab_user_order_4 tab_user_order_5 tab_user_order_6 tab_user_order_7 tab_user_order_8 tab_user_order_9
do
{#注意:出现断裂一般是--参数之间不止一个空格,如果出现warehouse中有表,但hue页面无表一般是缺少--hive-import这个参

#数,\$CONDITIONS这个关键字一定不能忽略。--target-dir:sqoop抽取时先把数据放到hdfs,再put过去,中间会建一个临时的

#表,这个就是hdfs的临时表放的位置,如果要写成warehouse的位置,你会发现只有最后一张表的对于分表来说。
sudo -u hdfs sqoop import --hive-drop-import-delims --connect jdbc:mysql://${dbhost}:${dbport}/${dbname} --username ${dbuser} --password ${dbpw} --delete-target-dir --lines-terminated-by '\n' --fields-terminated-by '\001' --null-string '\\N' --null-non-string '\\N' --hive-import --num-mappers 1 --query "SELECT  * from $table where \$CONDITIONS " --target-dir /tmp/hive-root/ --delete-target-dir --hive-table online_ods.online_all_tab_user_order


time=`date "+%Y-%m-%d %H:%M:%S"`
echo $tabe $time "is done"
echo "--------------------------finish----------------------------------"
}

done
}

#增量

#!/bin/bash

# ********************************************************************************
# 程序名称:    online_tab_user_order
# 功能描述:    将mysql中online_tab_user_order表数据sqoop抽取到hive中
# 输入参数:        
#              
# 输入资源:    
# 输出资源:    
#                
# 中间资源:    
# 创建人员:    csq    
# 创建日期:    
# 版本说明:
# 修改人员:    
# 修改日期:    
# 修改原因:   
# 版本说明:   
#              
# ********************************************************************************
# ********************************************************************************
#mysql中online_tab_user_order表数据sqoop抽取到hive中

VC_DBLIST='10.68.21.92,3306,hue,"xxx",user_online'
VC_DBLIST1='10.68.21.92,3306,hue,"xxxx",user_online_other'
export HADOOP_USER_NAME=
HIVE_SERVER='10.68.25.198:10000'

dblist=${VC_DBLIST}
dbhost=`echo "${dblist}" |awk -F, '{print $1}'`
dbport=`echo "${dblist}" |awk -F, '{print $2}'`
dbuser=`echo "${dblist}" |awk -F, '{print $3}'`
dbpw=`echo "${dblist}" |awk -F, '{print $4}'`
dbname=`echo "${dblist}" |awk -F, '{print $5}'`

if [ $# -eq 0 ];
  then
    p_in_time_str=`date -d @date -d today +'%Y-%m-%d'' 00:00:00'`
    p_in_time_end=$p_in_time_str
    
  elif [ $# -eq 1 ];
    then
      p_in_time_str=$1
      p_in_time_end=$1 
  elif [ $# -eq 2 ];
    then 
      p_in_time_str=$1
      p_in_time_end=$2
  else 
    p_in_time_str=$1
    p_in_time_end=$2
fi

vc_stat_st=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`
vc_stat_start=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`
vc_stat_ed=`date -d "$p_in_time_end  next-day" +'%Y-%m-%d'' 00:00:00'`

vi_load_st=`date -d "$p_in_time_str" +'%Y%m%d'`
vi_load_ed=`date -d "$p_in_time_end" +'%Y%m%d'`
vc_load_st=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`

vi_stat_st=`date -d "$p_in_time_str 1 day ago" +'%Y-%m-%d'' 00:00:00'`
vi_stat_ed=`date -d "$p_in_time_end" +'%Y-%m-%d'' 00:00:00'`
vi_stat=`date -d "$p_in_time_str  1 day ago" +'%Y%m%d'`

vi_part_drop=`date -d "$p_in_time_str 1080 day ago" +'%Y%m%d'`

echo $p_in_time_str","$p_in_time_end","$vc_stat_st","$vc_stat_ed","$vi_stat_st","$vi_stat_ed

sqoop_time=`date -d @date -d today +'%Y-%m-%d'`
qt=`date -d "2 days ago" +'%Y-%m-%d'' 24:00:00'`
ye=`date -d yesterday +'%Y-%m-%d'' 24:00:00'`

{
    beeline -u jdbc:hive2://${HIVE_SERVER} -n ${HADOOP_USER_NAME}  -e "
    drop table online_ods.online_tab_user_order;#注意要先删除,因为每天抽增量
    create table online_ods.online_tab_user_order(
     order_id string,
  brand_name string,
  channel_name string,
  county_name string,
  create_date string,
  customer_type_name string,
  customer_type_name_lv1 string,
  des_branch_name string,
  name_city string,
  name_province string,
  price string,
  product_mode_name string,
  product_name string,
  product_spec_name string,
  product_type_name string,
  quantity string,
  report_date string,
  salesmoney string,
  shop_id string,
  shop_name string,
  shopper_addr string,
  shopper_name string,
  shopper_phone string,
  subcompany_name string,
  user_id string,
  coupons1 string,
  coupons2 string,
  coupons3 string,
  confirm_date string,
  work_create_date string)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\n' STORED AS TEXTFILE;"
}&&{
for table in tab_user_order_0 tab_user_order_1 tab_user_order_2 tab_user_order_3 tab_user_order_4 tab_user_order_5 tab_user_order_6 tab_user_order_7 tab_user_order_8 tab_user_order_9
do
{
sudo -u hdfs sqoop import --hive-drop-import-delims --connect jdbc:mysql://${dbhost}:${dbport}/${dbname} --username ${dbuser} --password ${dbpw} --delete-target-dir --lines-terminated-by '\n' --fields-terminated-by '\001' --null-string '\\N' --null-non-string '\\N' --hive-import --num-mappers 1 --query "SELECT  * from $table where work_create_date> '${qt}' and work_create_date <='${ye}' and \$CONDITIONS " --target-dir /tmp/hive-root/ --delete-target-dir --hive-table online_ods.online_tab_user_order


time=`date "+%Y-%m-%d %H:%M:%S"`
echo $tabe $time "is done"
echo "--------------------------finish----------------------------------"
}

done
}&&{
    beeline -u jdbc:hive2://${HIVE_SERVER} -n ${HADOOP_USER_NAME}  -e "
        set hive.exec.dynamic.partition=true;  
        set hive.exec.dynamic.partition.mode=nonstrict;
        set hive.optimize.sort.dynamic.partition=false;
        set hive.exec.max.dynamic.partitions.pernode=1000;
        set hive.exec.max.dynamic.partitions=10000;            
INSERT into TABLE online_ods.online_all_tab_user_order
SELECT  
  order_id ,
  brand_name ,
  channel_name ,
  county_name ,
  create_date ,
  customer_type_name ,
  customer_type_name_lv1 ,
  des_branch_name ,
  name_city ,
  name_province ,
  price ,
  product_mode_name ,
  product_name ,
  product_spec_name ,
  product_type_name ,
  quantity ,
  report_date ,
  salesmoney ,
  shop_id ,
  shop_name ,
  shopper_addr ,
  shopper_name ,
  shopper_phone ,
  subcompany_name ,
  user_id ,
  coupons1 ,
  coupons2 ,
  coupons3 ,
  confirm_date ,
  work_create_date 
FROM online_ods.online_tab_user_order;"
}

dwd:

#!/bin/bash

# ********************************************************************************
# 程序名称:    意向用户-促销活动用户数
# 功能描述:    Tlink用户资产管理应用中,截止当前销售代表主动所添加的意向用户数(来源:用户数据中心)
# 输入参数:        
#              
# 输入资源:    
# 输出资源:    
#                
# 中间资源:    
# 创建人员:    csq    
# 创建日期:    
# 版本说明:
# 修改人员:    
# 修改日期:    
# 修改原因:   
# 版本说明:   
#              
# ********************************************************************************
# ********************************************************************************
#mysql中online_tab_user_order表数据sqoop抽取到hive中

VC_DBLIST='10.68.25.201,3306,admin,"123$#sadY23",user_online'
VC_DBLIST1='10.68.25.201,3306,admin,"123$#sadY23",user_online_other'
HIVE_SERVER='10.68.25.198:10000'
export HADOOP_USER_NAME=chensiqing

dblist=${VC_DBLIST}
dbhost=`echo "${dblist}" |awk -F, '{print $1}'`
dbport=`echo "${dblist}" |awk -F, '{print $2}'`
dbuser=`echo "${dblist}" |awk -F, '{print $3}'`
dbpw=`echo "${dblist}" |awk -F, '{print $4}'`
dbname=`echo "${dblist}" |awk -F, '{print $5}'`

if [ $# -eq 0 ];
  then
    p_in_time_str=`date -d @date -d today +'%Y-%m-%d'' 00:00:00'`
    p_in_time_end=$p_in_time_str
    
  elif [ $# -eq 1 ];
    then
      p_in_time_str=$1
      p_in_time_end=$1 
  elif [ $# -eq 2 ];
    then 
      p_in_time_str=$1
      p_in_time_end=$2
  else 
    p_in_time_str=$1
    p_in_time_end=$2
fi

vc_stat_st=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`
vc_stat_start=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`
vc_stat_ed=`date -d "$p_in_time_end  next-day" +'%Y-%m-%d'' 00:00:00'`

vi_load_st=`date -d "$p_in_time_str" +'%Y%m%d'`
vi_load_ed=`date -d "$p_in_time_end" +'%Y%m%d'`
vc_load_st=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`

vi_stat_st=`date -d "$p_in_time_str 1 day ago" +'%Y-%m-%d'' 00:00:00'`
vi_stat_ed=`date -d "$p_in_time_end" +'%Y-%m-%d'' 00:00:00'`
vi_stat=`date -d "$p_in_time_str  1 day ago" +'%Y%m%d'`

vi_part_drop=`date -d "$p_in_time_str 1080 day ago" +'%Y%m%d'`

echo $p_in_time_str","$p_in_time_end","$vc_stat_st","$vc_stat_ed","$vi_stat_st","$vi_stat_ed

createDate=`date -d @date -d today +'%Y-%m-%d'`
qt=`date -d "2 days ago" +'%Y-%m-%d'' 24:00:00'`
ye=`date -d yesterday +'%Y-%m-%d'' 24:00:00'`

{
    beeline -u jdbc:hive2://${HIVE_SERVER} -n ${HADOOP_USER_NAME}  -e "
    drop table online_dw.actionUserResult_tab;
create TABLE online_dw.actionUserResult_tab(
 subcompany_name string,
 number int
)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\n' STORED AS TEXTFILE;

INSERT into TABLE online_dw.actionUserResult_tab 
select w.sn,count(w.intention_id)  from
(
SELECT   distinct s.subcompany_name sn,s.user_id ,s.user_name,ia.intention_id
FROM online_ods.online_all_crm_user_shop s
left JOIN 
        (
            select i.user_id,i.intention_id FROM online_ods.online_all_tab_user_intention i
            where i.intention_source ='2' and i.user_stage in ('0','2')
        ) ia on ia.user_id=s.user_id
WHERE s.subcompany_name LIKE '%分公司%'

) w
group by w.sn;"

time=`date "+%Y-%m-%d %H:%M:%S"`
echo $time "table online_dw.actionUserResult_tab drop  create insert is done"
echo "--------------------------finish----------------------------------"
}

#将所有结果表合成想要的报表

#!/bin/bash

# ********************************************************************************
# 程序名称:    复购用户-累计已购
# 功能描述:    截止当前订单数大于等于2的用户人数
# 输入参数:        
#              
# 输入资源:    
# 输出资源:    
#                
# 中间资源:    
# 创建人员:    csq    
# 创建日期:    
# 版本说明:
# 修改人员:    
# 修改日期:    
# 修改原因:   
# 版本说明:   
#              
# ********************************************************************************
# ********************************************************************************
#mysql中online_tab_user_order表数据sqoop抽取到hive中

VC_DBLIST='10.68.25.201,3306,admin,"XXXX",user_online'
VC_DBLIST1='10.68.25.201,3306,admin,"XXXXX",user_online_other'
HIVE_SERVER='10.68.25.198:10000'
export HADOOP_USER_NAME=

dblist=${VC_DBLIST}
dbhost=`echo "${dblist}" |awk -F, '{print $1}'`
dbport=`echo "${dblist}" |awk -F, '{print $2}'`
dbuser=`echo "${dblist}" |awk -F, '{print $3}'`
dbpw=`echo "${dblist}" |awk -F, '{print $4}'`
dbname=`echo "${dblist}" |awk -F, '{print $5}'`

if [ $# -eq 0 ];
  then
    p_in_time_str=`date -d @date -d today +'%Y-%m-%d'' 00:00:00'`
    p_in_time_end=$p_in_time_str
    
  elif [ $# -eq 1 ];
    then
      p_in_time_str=$1
      p_in_time_end=$1 
  elif [ $# -eq 2 ];
    then 
      p_in_time_str=$1
      p_in_time_end=$2
  else 
    p_in_time_str=$1
    p_in_time_end=$2
fi

vc_stat_st=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`
vc_stat_start=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`
vc_stat_ed=`date -d "$p_in_time_end  next-day" +'%Y-%m-%d'' 00:00:00'`

vi_load_st=`date -d "$p_in_time_str" +'%Y%m%d'`
vi_load_ed=`date -d "$p_in_time_end" +'%Y%m%d'`
vc_load_st=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`

vi_stat_st=`date -d "$p_in_time_str 1 day ago" +'%Y-%m-%d'' 00:00:00'`
vi_stat_ed=`date -d "$p_in_time_end" +'%Y-%m-%d'' 00:00:00'`
vi_stat=`date -d "$p_in_time_str  1 day ago" +'%Y%m%d'`

vi_part_drop=`date -d "$p_in_time_str 1080 day ago" +'%Y%m%d'`

echo $p_in_time_str","$p_in_time_end","$vc_stat_st","$vc_stat_ed","$vi_stat_st","$vi_stat_ed

createDate=`date -d @date -d today +'%Y-%m-%d'`
qt=`date -d "2 days ago" +'%Y-%m-%d'' 24:00:00'`
ye=`date -d yesterday +'%Y-%m-%d'' 24:00:00'`

{
    beeline -u jdbc:hive2://${HIVE_SERVER} -n ${HADOOP_USER_NAME}  -e "
    drop table online_dm.Report;
create TABLE online_dm.Report(
 subcompany_name string,
 sum_add_user int,
 sum_week_add_user int,
 sum_action_add_user int,
 sum_week_action_add_user int,
 sum_ordered_add_user int,
 sum_week_ordered_add_user int,
 sum_transform_add_user int,
 sum_week_transform_add_user int,
 sum_old_order_add_user int,
 sum_week_old_order_user int
)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\n' STORED AS TEXTFILE;


INSERT into TABLE online_dm.Report
SELECT a.subcompany_name,a.number,b.number,c.number,d.number,e.number,f.number,g.number,h.number,i.number,j.number
from 
online_dw.adduserresult_tab a
left join 
online_dw.addweekuserresult_tab b on a.subcompany_name=b.subcompany_name
left join 
 online_dw.actionuserresult_tab c on b.subcompany_name=c.subcompany_name
left join 
 online_dw.actionweekuserresult_tab d on c.subcompany_name=d.subcompany_name
left join 
 online_dw.orderedadduserresult_tab e on d.subcompany_name=e.subcompany_name
left join 
 online_dw.orderedaddweekuserresult_tab f on e.subcompany_name=f.subcompany_name
left join 
online_dw.transformadduserresult_tab g on f.subcompany_name=g.subcompany_name
left join 
online_dw.transformweekadduserresult_tab h on g.subcompany_name=h.subcompany_name
left join 
online_dw.oldorderedthantwo i on h.subcompany_name=i.subcompany_name
left join 
online_dw.oldweekorderedthantwo j on i.subcompany_name=j.subcompany_name ;"

time=`date "+%Y-%m-%d %H:%M:%S"`
echo $time "online_dm.Report drop  create insert is done"
echo "--------------------------finish----------------------------------"
}

dm层

#!/bin/bash

# ********************************************************************************
# 程序名称:    报表导出到mysql
# 功能描述:    将dm层的report数据sqoop抽取到hive中
# 输入参数:        
#              
# 输入资源:    
# 输出资源:    
#                
# 中间资源:    
# 创建人员:    csq    
# 创建日期:    
# 版本说明:
# 修改人员:    
# 修改日期:    
# 修改原因:   
# 版本说明:   
#              
# ********************************************************************************
# ********************************************************************************


VC_DBLIST='10.68.25.201,3306,admin,"xxxx",user_online'
VC_DBLIST1='10.68.25.201,3306,admin,"xxxxx",user_online_other'
HIVE_SERVER='10.68.25.198:10000'

export HADOOP_USER_NAME=

dblist=${VC_DBLIST}
dbhost=`echo "${dblist}" |awk -F, '{print $1}'`
dbport=`echo "${dblist}" |awk -F, '{print $2}'`
dbuser=`echo "${dblist}" |awk -F, '{print $3}'`
dbpw=`echo "${dblist}" |awk -F, '{print $4}'`
dbname=`echo "${dblist}" |awk -F, '{print $5}'`

if [ $# -eq 0 ];
  then
    p_in_time_str=`date -d @date -d today +'%Y-%m-%d'' 00:00:00'`
    p_in_time_end=$p_in_time_str
    
  elif [ $# -eq 1 ];
    then
      p_in_time_str=$1
      p_in_time_end=$1 
  elif [ $# -eq 2 ];  
    then 
      p_in_time_str=$1
      p_in_time_end=$2
  else 
    p_in_time_str=$1
    p_in_time_end=$2
fi

vc_stat_st=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`
vc_stat_start=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`
vc_stat_ed=`date -d "$p_in_time_end  next-day" +'%Y-%m-%d'' 00:00:00'`

vi_load_st=`date -d "$p_in_time_str" +'%Y%m%d'`
vi_load_ed=`date -d "$p_in_time_end" +'%Y%m%d'`
vc_load_st=`date -d "$p_in_time_str" +'%Y-%m-%d'' 00:00:00'`

vi_stat_st=`date -d "$p_in_time_str 1 day ago" +'%Y-%m-%d'' 00:00:00'`
vi_stat_ed=`date -d "$p_in_time_end" +'%Y-%m-%d'' 00:00:00'`
vi_stat=`date -d "$p_in_time_str  1 day ago" +'%Y%m%d'`

vi_part_drop=`date -d "$p_in_time_str 1080 day ago" +'%Y%m%d'`

echo $p_in_time_str","$p_in_time_end","$vc_stat_st","$vc_stat_ed","$vi_stat_st","$vi_stat_ed

sqoop_time=`date -d @date -d today +'%Y-%m-%d'`
qt=`date -d "2 days ago" +'%Y-%m-%d'' 24:00:00'`
ye=`date -d yesterday +'%Y-%m-%d'' 24:00:00'`

{#将生成的报表sqoop倒到mysql中,注意加上?useUnicode=true&characterEncoding=utf-8防止中文乱码,还要提前mysql中建

#好表。每次sqoop倒出时,要注意truncat清空表,覆盖不了的
sudo -u hdfs sqoop export --connect "jdbc:mysql://${dbhost}:${dbport}/${dbname}?useUnicode=true&characterEncoding=utf-8" --username ${dbuser} --password ${dbpw} --num-mappers 1 --export-dir /user/hive/warehouse/online_dm.db/report --table report --input-fields-terminated-by '\001' --input-null-string '\\N' --input-null-non-string '\\N'
time=`date "+%Y-%m-%d %H:%M:%S"`
echo " report at " $time "is done"
echo "--------------------------finish----------------------------------"

}

猜你喜欢

转载自blog.csdn.net/shuimofengyang/article/details/84523670