sqoop安装配置

sqoop安装配置

  • 下载
wget -c http://ftp.riken.jp/net/apache/sqoop/1.4.7/sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz
  • 解压
tar -zxvf sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz -C /usr/java/
  • 配置
    • 配置环境变量/etc/profile
    export SQOOP_HOME=/usr/java/sqoop-1.4.7.bin__hadoop-2.6.0
    export PATH=$SQOOP_HOME/bin:$PATH
    
    • 立即生效
    ource /etc/profile
    
    • 编辑sqoop-env.sh
    cp sqoop-env-template.sh sqoop-env.sh
    vim sqoop-env.sh
    
    新增如下配置
    export HADOOP_COMMON_HOME=/usr/java/hadoop-3.0.0
    export HADOOP_MAPRED_HOME=/usr/java/hadoop-3.0.0
    export HBASE_HOME=/usr/java/hbase-1.4.1
    export HIVE_HOME=/usr/java/apache-hive-2.3.2-bin
    
    • lib目录下下载jdbc jar包
    http://central.maven.org/maven2/mysql/mysql-connector-java/6.0.6/mysql-connector-java-6.0.6.jar
    
    • 注释掉bin/configure-sqoop文件中HCAT_HOME,ACCUMULO_HOME, ZOOKEEPER_HOME的检查
    ## Moved to be a runtime check in sqoop.
    if [ ! -d "${HBASE_HOME}" ]; then
      echo "Warning: $HBASE_HOME does not exist! HBase imports will fail."
      echo 'Please set $HBASE_HOME to the root of your HBase installation.'
    fi
    ## Moved to be a runtime check in sqoop.
    #if [ ! -d "${HCAT_HOME}" ]; then
    #  echo "Warning: $HCAT_HOME does not exist! HCatalog jobs will fail."
    #  echo 'Please set $HCAT_HOME to the root of your HCatalog installation.'
    #fi
    #if [ ! -d "${ACCUMULO_HOME}" ]; then
    #  echo "Warning: $ACCUMULO_HOME does not exist! Accumulo imports will fail."
    #  echo 'Please set $ACCUMULO_HOME to the root of your Accumulo installation.'
    #fi
    #if [ ! -d "${ZOOKEEPER_HOME}" ]; then
    #  echo "Warning: $ZOOKEEPER_HOME does not exist! Accumulo imports will fail."
    #  echo 'Please set $ZOOKEEPER_HOME to the root of your Zookeeper installation.'
    #fi
    
  • mysql导入数据到 hdfs
    • 启动hadoop集群
    start-all.sh
    
    • mysql创建新表tb_order
    CREATE TABLE mytest.`tb_order` (
      `id` INT(11) NOT NULL AUTO_INCREMENT,
      `name` VARCHAR(50) NOT NULL,
      `amount` DECIMAL(10,2) NOT NULL,
      `create_time` TIMESTAMP NOT NULL DEFAULT '0000-00-00 00:00:00',
      PRIMARY KEY (`id`)
    ) ENGINE=INNODB DEFAULT CHARSET=utf8;
    
    • 插入一条数据
    • 执行导入命令
    sqoop import --connect jdbc:mysql://master:3306/mytest --username root --password wujinlei --table tb_order --target-dir /home/wujinlei/work/mytest/tb_order -m 1
    
    • 查看导入的文件
    hdfs dfs -ls /home/wujinlei/work/mytest/tb_order
    hadoop fs -cat /home/wujinlei/work/mytest/tb_order/part-m-00000
    
  • 导入hive
    • 创建hive表
    create table tb_order(id int,name string,amount decimal(10,2),create_time string) row format delimited fields terminated by ','
    lines terminated by '\n';
    
    • 导入数据到hive
    load data inpath '/home/wujinlei/work/mytest/tb_order' overwrite into table tb_order;
    
    • 验证数据
    select * from tb_order;
    

猜你喜欢

转载自my.oschina.net/u/3163032/blog/1629847