HBase表数据的转移之使用自定义MapReduce

目标:将fruit表中的一部分数据,通过MR迁入到fruit_mr表中

Step1、构建ReadFruitMapper类,用于读取fruit表中的数据

package com.z.hbase_mr;

import java.io.IOException;

import org.apache.hadoop.hbase.Cell;

import org.apache.hadoop.hbase.CellUtil;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.client.Result;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.TableMapper;

import org.apache.hadoop.hbase.util.Bytes;

public class ReadFruitMapper extends TableMapper<ImmutableBytesWritable, Put> {

       @Override

       protected void map(ImmutableBytesWritable key, Result value, Context context)

       throws IOException, InterruptedException {

              //将fruit的name和color提取出来,相当于将每一行数据读取出来放入到Put对象中。

              Put put = new Put(key.get());

              //遍历添加column行

              for(Cell cell: value.rawCells()){

                     //添加/克隆列族:info

                     if("info".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){

                            //添加/克隆列:name

                            if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){

                                   //将该列cell加入到put对象中

                                   put.add(cell);

                                   //添加/克隆列:color

                            }else if("color".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){

                                   //向该列cell加入到put对象中

                                   put.add(cell);

                            }

                     }

              }

              //将从fruit读取到的每行数据写入到context中作为map的输出

              context.write(key, put);

       }

}

Step2、构建WriteFruitMRReducer类,用于将读取到的fruit表中的数据写入到fruit_mr表中

package com.z.hbase_mr;

import java.io.IOException;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.io.ImmutableBytesWritable;

import org.apache.hadoop.hbase.mapreduce.TableReducer;

import org.apache.hadoop.io.NullWritable;

public class WriteFruitMRReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {

       @Override

       protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context)

       throws IOException, InterruptedException {

              //读出来的每一行数据写入到fruit_mr表中

              for(Put put: values){

                     context.write(NullWritable.get(), put);

              }

       }

}

Step3、构建Fruit2FruitMRJob extends Configured implements Tool,用于组装运行Job任务

    //组装Job

       public int run(String[] args) throws Exception {

              //得到Configuration

              Configuration conf = this.getConf();

              //创建Job任务

              Job job = Job.getInstance(conf, this.getClass().getSimpleName());

              job.setJarByClass(Fruit2FruitMRJob.class);

              //配置Job

              Scan scan = new Scan();

              scan.setCacheBlocks(false);

              scan.setCaching(500);

              //设置Mapper,注意导入的是mapreduce包下的,不是mapred包下的,后者是老版本

              TableMapReduceUtil.initTableMapperJob(

              "fruit", //数据源的表名

              scan, //scan扫描控制器

              ReadFruitMapper.class,//设置Mapper类

              ImmutableBytesWritable.class,//设置Mapper输出key类型

              Put.class,//设置Mapper输出value值类型

              job//设置给哪个JOB

              );

              //设置Reducer

              TableMapReduceUtil.initTableReducerJob("fruit_mr", WriteFruitMRReducer.class, job);

              //设置Reduce数量,最少1个

              job.setNumReduceTasks(1);

              boolean isSuccess = job.waitForCompletion(true);

              if(!isSuccess){

                     throw new IOException("Job running with error");

              }

              return isSuccess ? 0 : 1;

       }

Step4、主函数中调用运行该Job任务

public static void main( String[] args ) throws Exception{

Configuration conf = HBaseConfiguration.create();

int status = ToolRunner.run(conf, new Fruit2FruitMRJob(), args);

System.exit(status);

}

猜你喜欢

转载自www.cnblogs.com/zyanrong/p/10753735.html