demand:
1, the total amount per day drawn through Sqoop SqlServer data stored in accordance with the hive, is divided by a Partition hive
2, Sqoop commonly used commands
3, Sqoop installed without drive
#!/usr/bin/env bash
################################################################################
# 功能描述:抽取关系型数据库中数据(全量)
# 输入:源表名称、目标表名称
# 输出: 1
# 创建人:余辉
# 创建时间:2019-01-08
#
#./extractSqlServer.sh -n dbName -s tableName -i hiveTableName -d 2019-01-07 -t a
# 脚本名称 数据库 原始表 hive表 时间
#
#实现步鄹:
# 1、通过sqoop获取SqlServer的表结构,提取列名和字段类型
# 2、通过sed将放回结果进行处理,去头去尾,且去掉空格 ,保存schema,eg: |列名|字段类型|
# 3、通过读取schema,将字段类型转换,组织成hive的建表语句
# 4、hive -e 执行建表语句
# 5、通过sqoop全量提取表格数据,存到分区中
################################################################################
username="username"
password="password"
script_path="/home/yh/create_table_script"
error()
{
echo "$@" 1>&2
usage_and_exit 1
}
usage()
{
echo "Usage:$PROGRAM [-n RDBname][-s sourceTableName] [-i hive_tableName] [-d PartitionDate] [-t extractType]"
}
usage_andexit()
{
usage
exit $1
}
warning()
{
echo "$@" 1>&2
EXITCODE=`expr $EXITCODE + 1`
}
#脚本名称
PROGRAM=`basename $0`
if [[ $# -lt 10 ]];then
usage
exit 1
fi
#获取参数内容
while getopts 'n:s:i:d:t:' OPT; do
case $OPT in
n)
RDBName=${OPTARG}
;;
s)
s_tableName=${OPTARG}
;;
i)
hive_tableName=${OPTARG}
;;
d)
partitionValue=${OPTARG}
;;
t)
extractType=${OPTARG}
;;
?)
usage_andexit 0
esac
done
con="jdbc:sqlserver://IP:1433;databasename="${RDBName}
#去头去尾标签
sqoop eval --connect ${con} --username ${username} --password ${password} --query "Select COLUMN_NAME, DATA_TYPE From INFORMATION_SCHEMA.COLUMNS Where TABLE_NAME = '${s_tableName}'" | sed 1,5d | sed '$'d > ${script_path}/${s_tableName}
#将空格去掉
sed -i 's/ //g' ${script_path}/${s_tableName}
echo "#### 查看SqlServer中的字段和类型 #####"
cat ${script_path}/${s_tableName}
echo "#### 查看SqlServer中的字段和类型 #####"
clums=""
#解析SqlServer的字段和类型
for line in `cat ${script_path}/${s_tableName}`
do
field=`echo $line | awk -F "|" '{print $2}'`
fieldType=`echo $line | awk -F "|" '{print $3}'`
if [ "$fieldType"x == "datetime"x ];then
fieldType="string"
elif [ "$fieldType"x == "varchar"x ];then
fieldType="string"
elif [ "$fieldType"x == "char"x ];then
fieldType="string"
elif [ "$fieldType"x == "decimal"x ];then
fieldType="string"
elif [ "$fieldType"x == "timestamp"x ];then
fieldType="string"
elif [ "$fieldType"x == "tinyint"x ];then
fieldType="int"
elif [ "$fieldType"x == "date"x ];then
fieldType="string"
elif [ "$fieldType"x == "smallint"x ];then
fieldType="int"
elif [ "$fieldType"x == "money"x ];then
fieldType="float"
elif [ "$fieldType"x == "bit"x ];then
fieldType="string"
fi
clums+=$field" "$fieldType" ,\n"
done
echo "clums==>" $clums
echo "------------------------------------------"
#拼接Hive的执行语句
dropSql="use stage;"
createHeaderSql="create EXTERNAL table IF NOT EXISTS ${hive_tableName} ("
createMidSql=`echo -e "${clums%,*}"`
createEndSql=") partitioned by (dt string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' stored as textfile"
exeSqlStr=$dropSql" "$createHeaderSql" "$createMidSql" "$createEndSql
#执行的时间
partitionDate=`date "+%Y-%m-%d" -d "${partitionValue} -1 day"`
echo "dropSql==>" ${dropSql}
echo "createHeaderSql==>" ${createHeaderSql}
echo "createMidSql==>" ${createMidSql}
echo "createEndSql==>" ${createEndSql}
echo "===sql=" ${exeSqlStr}
echo "partitionDate====="${partitionDate}
#建立hive表格
hive -e "$exeSqlStr"
#将SqlServer的数据存储到hive的分区中
if [ "${extractType}" == "a" ];then
sqoop import --connect ${con} --username ${username} --password ${password} --table ${s_tableName} --m 1 --hive-import --fields-terminated-by '\t' --hive-overwrite --hive-table ${hive_tableName} --hive-database stage --hive-partition-key "dt" --hive-partition-value ${partitionDate} --delete-target-dir --null-string '\\N' --null-non-string '\\N'
else
echo "no extractType match excute nothing!!!"
fi
#删除sqoop产生的java类
rm -rf /home/yh/*.java
2, Sqoop commonly used commands
mysql connection
sqoop list-databases --connect jdbc:mysql://IP:3306/DBName --username root --password 123456
SqlServer common commands
#查看所有数据库
sqoop list-databases --connect jdbc:sqlserver://IP:1433;databasename=DBName --username root --password 123456
#查看所有表
sqoop list-tables --connect 'jdbc:sqlserver://IP:1433;' --username root --password 123456
#查看表属性
sqoop eval --connect 'jdbc:sqlserver://IP:1433;databasename=DBName' --username root --password '123456' --query "sp_help ALERTS"
Available commands:
codegen Generate code to interact with database records
create-hive-table Import a table definition into Hive
eval Evaluate a SQL statement and display the results
export Export an HDFS directory to a database table
help List available commands
import Import a table from a database to HDFS
import-all-tables Import tables from a database to HDFS
import-mainframe Import datasets from a mainframe server to HDFS
job Work with saved jobs
list-databases List available databases on a server
list-tables List available tables in a database
merge Merge results of incremental imports
metastore Run a standalone Sqoop metastore
version Display version information
3, Sqoop installed without drive
https://mvnrepository.com/artifact/com.microsoft.sqlserver/mssql-jdbc/6.2.2.jre7