mysql -> hive
第一步:hive建表
create table airplane(
ID int,
ID_scard string,
staion_from string,
station_to string,
plane_no string,
time_takeoff string
)
row format delimited fields terminated by ',';
第二步:写成一个.sh脚本
#!/bin/bash
echo "mysql的airplane数据导入hive表"
sqoop job --create sqoop_job -- import \
--connect jdbc:mysql://node3:3306/policedb \
--table airplane \
--username root \
--password-file /sqoop/mysql.pwd \
--target-dir /user/hive/warehouse/data_works.db/airplane \
--num-mappers 1 \
--fields-terminated-by "," \
--incremental append \
--check-column ID \
--last-value 5000
执行sqoop job --exec sqoop_job
第五步:用airflow的python脚本并调度-->3分钟调度一次
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators import EmailOperator
from datetime import datetime, timedelta
default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': datetime(2019,9,2,14,40,00),
'retries': 3,
'retryDelay': timedelta(seconds=5)
}
dag = DAG('sqoophive',
default_args=default_args,
schedule_interval=timedelta(minutes=3))
sqoophive = BashOperator(
task_id='sqoophive',
dag=dag,
bash_command='set -e; sqoop job --exec sqoop_job {{ds_nodash}} ;'
)