Use MapReduce to operate HBase

1 Environmental preparation

1) Successfully built hadoop-2.6.0-cdh5.10.0, hbase-1.2.0-cdh5.10.0 development environment
2) Successfully started dfs, yarn, zookeeper, master, regionserver, and tested through HBase Shell

sbin/start-dfs.sh
sbin/start-yarn.sh
bin/hbase-daemon.sh start zookeeper
bin/hbase-daemon.sh start master
bin/hbase-daemon.sh start regionserver

3) Use Eclipse as a development tool and import the following two dependencies
[External link image transfer failed, the source site may have an anti-leeching mechanism, it is recommended to save the image and upload it directly (img-4vhfoeZK-1589855901219)()]

2 Create the project

We use the HBase Java API to practice this project.

3 Create hdfs directory and upload data to HBases

(1) The content of the data set data.txt is as follows

public static class extends private new
private final static void
key value context context
throws exception 
value string line value value string
split
string string new string line Value 
while 
string value
string word value string
set word value 
context write public static class
word extends  text text private
new void key value context context
throws int sum for get
output set sum context write key output value

(2) Create directory and upload data

bin/hadoop fs -mkdir -p /user/root/hbase/input/
bin/hadoop fs -put /home/chenbo/data/hadoop/data.txt /user/root/hbase/input/

(3) The code is as follows

import java.nio.file.Path;
import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.util.Tool;
 
public class WordCountUpLoadToHBase extends Configured {
    
    

    public static class WCHBaseMapper extends Mapper<Object, Text, ImmutableBytesWritable, IntWritable> {
    
    
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        public void map(Object key,Text value,Context context) throws IOException, InterruptedException{
    
    
            StringTokenizer strs = new StringTokenizer(value.toString());
            while(strs.hasMoreTokens()){
    
    
                word.set(strs.nextToken());
                context.write(new ImmutableBytesWritable(Bytes.toBytes(word.toString())), one);
            }
        }
    }

    public static class WCHBaseReducer extends TableReducer<ImmutableBytesWritable, IntWritable, ImmutableBytesWritable>{
    
    

        public void reduce(ImmutableBytesWritable key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{
    
    
            int sum = 0;
            for(IntWritable val:values){
    
     
                sum += val.get();
            }
            Put put = new Put(key.get());
            put.add(Bytes.toBytes("content"),Bytes.toBytes("count"),Bytes.toBytes(sum+""));
            context.write(key, put);
        }
    }
    @SuppressWarnings("all")
    public static void main(String[] args) throws MasterNotRunningException, ZooKeeperConnectionException, IOException, ClassNotFoundException, InterruptedException {
    
    
        // TODO Auto-generated method stub
        String tableName = "wordcount";
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","192.168.159.130");
        conf.set("hbase.zookeeper.property.clientPort","2181");

        HBaseAdmin admin = new HBaseAdmin(conf);
        if(admin.tableExists(tableName)){
    
    
            admin.disableTable(tableName);
            admin.deleteTable(tableName);
        }
        HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
        HColumnDescriptor columnDescriptor =new HColumnDescriptor("content");
        tableDescriptor.addFamily(columnDescriptor);
        admin.createTable(tableDescriptor);

        Job job = new Job(conf,"upload to hbase");
        job.setJarByClass(WordCountUpLoadToHBase.class);
        job.setMapperClass(WCHBaseMapper.class);
        TableMapReduceUtil.initTableReducerJob(tableName, WCHBaseReducer.class, job,null,null,null,null,false);

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(Put.class);
        FileInputFormat.addInputPaths(job, "hdfs://192.168.159.130:8020/user/root/hbase/input/data.txt");
        System.exit(job.waitForCompletion(true)?0:1);
    }
}

[External link image transfer failed. The source site may have an anti-leech link mechanism. It is recommended to save the image and upload it directly (img-KiSNLsnA-1589855901222)()]

[External link image transfer failed, the source site may have an anti-leeching mechanism, it is recommended to save the image and upload it directly (img-27tApHMT-1589855901224)()]

4 Obtain data from HBase and upload the calculation results to HDFS

4.1 Create MRReadFormHbase.class

import java.io.IOException;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MRReadFromHbase extends Configured {
    
    

    public static class WCHBaseMapper extends TableMapper<Text, Text>{
    
    

        @Override
        public void map(ImmutableBytesWritable key,Result values,Context context) throws IOException, InterruptedException{
    
    
            StringBuffer sb =new StringBuffer("");
            for(Map.Entry<byte[], byte[]> value:values.getFamilyMap("content".getBytes()).entrySet()){
    
    
                String  str =new String(value.getValue());
                if(str!=null){
    
    
                    sb.append(str);
                }
                context.write(new Text(key.get()), new Text(sb.toString()));
            }
        }
    }

    public static class WCHBaseReducer extends Reducer<Text, Text, Text, Text>{
    
    
        private Text result =new Text();
        public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
    
    
            for(Text val:values){
    
    
                result.set(val);
                context.write(key,result);
            }
        }
    }


    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    
    
        // TODO Auto-generated method stub
        String tableName = "wordcount";
        Configuration conf =HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","192.168.159.130");
        conf.set("hbase.zookeeper.property.clientPort","2181");


        Job job =new Job(conf,"read from hbase to hdfs");
        job.setJarByClass(MRReadFromHbase.class);
        job.setReducerClass(WCHBaseReducer.class);
        TableMapReduceUtil.initTableMapperJob(tableName, new Scan(), WCHBaseMapper.class, Text.class, Text.class, job);
        FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.159.130:8020/user/root/hbase/output/"));
        System.exit(job.waitForCompletion(true)?0:1);
    }
}

[External link image transfer failed. The source site may have an anti-leech link mechanism. It is recommended to save the image and upload it directly (img-Gpq802pc-1589855901228)()]