HBase权威指南学习记录（五、hbase与MapReduce集成）

添加依赖：

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.4.9</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.9.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.4.9</version>
        </dependency>

student.txt数据：

1,Sam,18
2,Tom,16
3,Jetty,25
4,LiLei,56

hbase建表：

create 'student','cf1'

1.MapReduce作业从一个文件中读取数据并写入hbase表中

public class HadoopConnectTest extends Configured implements Tool{
    public static class Mapper1 extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String values[] = value.toString().split(",");
            String id = values[0];
            String name = values[1];
            String age = values[2];
//            新建put并插入数据
            Put put = new Put(id.getBytes());
            put.addColumn("cf1".getBytes(), "name".getBytes(), name.getBytes());
            put.addColumn("cf1".getBytes(), "age".getBytes(), age.getBytes());
            if (!put.isEmpty()) {
//                插入表名
                ImmutableBytesWritable ib = new ImmutableBytesWritable("student".getBytes());
                context.write(ib, put);
            }
        }
    }

    //HDFS路径
    private static final String HDFS = "hdfs://192.168.30.141:9000";
    //输入文件路径
    private static final String INPATH = HDFS + "/student.txt";

    public int run() throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = HBaseConfiguration.create();
        //任务的配置设置，configuration是一个任务的配置对象，封装了任务的配置信息
        conf.set("hbase.zookeeper.quorum", "hadoop1:2181,hadoop2:2181,hadoop3:2181");
        conf.set("hbase.rootdir", "hdfs://hadoop1:9000/hbase");

        Job job = Job.getInstance(conf, "HFile bulk load test");
        job.setJarByClass(HadoopConnectTest.class);
        job.setMapperClass(Mapper1.class);
        // TableMapReduceUtil是HBase提供的工具类，会自动设置mapreuce提交到hbase任务的各种配置，封装了操作，只需要简单的设置即可
        //设置表名为student，reducer类为空，job为此前设置号的job
        TableMapReduceUtil.initTableReducerJob("student", null, job);
        // 设置reduce过程，这里由map端的数据直接提交，不要使用reduce类，因而设置成null,并设置reduce的个数为0
        job.setNumReduceTasks(0);

        FileInputFormat.addInputPath(job, new Path(INPATH));
        return (job.waitForCompletion(true) ? 0 : -1);
    }

    public static void main(String[] args) {
        int status = new HadoopConnectTest().run();
        System.exit(status);
    }
}

执行jar包：

./hadoop jar /usr/local/hbase.jar

验证：

2.MapReduce从hbase表中读取数据并存入文件中

public class HBaseMapper extends TableMapper<Text, Text> {

    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context)
            throws IOException, InterruptedException {
        for (Cell cell : value.rawCells()) {
            String row = new String(CellUtil.cloneRow(cell));
            String name = new String(CellUtil.cloneValue(cell));
            context.write(new Text(row), new Text(name));
        }
    }
}

public class HBaseJob {
    public static final String tableName = "student";
    public static final String outputFilePath = "hdfs://hadoop1:9000/output";

    public static Configuration conf = HBaseConfiguration.create();

    static {
        conf.set("hbase.zookeeper.quorum", "hadoop1:2181,hadoop2:2181,hadoop3:2181");
        conf.set("hbase.rootdir", "hdfs://hadoop1:9000/hbase");
        conf.set("hbase.master", "hadoop1:60000");
    }

    public static void main(String[] args)
            throws IOException, InterruptedException, ClassNotFoundException {
        Scan scan = new Scan();
        scan.addColumn("cf1".getBytes(),"name".getBytes());

        Job job = Job.getInstance(conf, "hbase_word_count");
        job.setJarByClass(HBaseJob.class);

        TableMapReduceUtil.initTableMapperJob(
                "student",
                scan,
                HBaseMapper.class,
                Text.class,
                Text.class,
                job);

        FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

HBase权威指南学习记录（五、hbase与MapReduce集成）

1.MapReduce作业从一个文件中读取数据并写入hbase表中

2.MapReduce从hbase表中读取数据并存入文件中

猜你喜欢