Through the relevant Java API of HBase, we can implement the MapReduce process accompanying HBase operations, such as using MapReduce to import data from the local file system into an HBase table. For example, we read some raw data from HBase and then use MapReduce for data analysis.
1 Official HBase-MapReduce
1. View the execution of HBase MapReduce tasks
./bin/hbase mapredcp
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/module/hbase-1.3.1/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/module/hadoop-3.1.3/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
/opt/module/hbase-1.3.1/lib/zookeeper-3.4.6.jar:/opt/module/hbase-1.3.1/lib/guava-12.0.1.jar:/opt/module/hbase-1.3.1/lib/metrics-core-2.2.0.jar:/opt/module/hbase-1.3.1/lib/protobuf-java-2.5.0.jar:/opt/module/hbase-1.3.1/lib/hbase-common-1.3.1.jar:/opt/module/hbase-1.3.1/lib/hbase-protocol-1.3.1.jar:/opt/module/hbase-1.3.1/lib/htrace-core-3.1.0-incubating.jar:/opt/module/hbase-1.3.1/lib/hbase-client-1.3.1.jar:/opt/module/hbase-1.3.1/lib/hbase-hadoop-compat-1.3.1.jar:/opt/module/hbase-1.3.1/lib/netty-all-4.0.23.Final.jar:/opt/module/hbase-1.3.1/lib/hbase-server-1.3.1.jar:/opt/module/hbase-1.3.1/lib/hbase-prefix-tree-1.3.1.jar
2. Import of environment variables
(1) Execute the import of environment variables (temporarily effective, perform the following operations on the command line)
$ export HBASE_HOME=/opt/module/hbase
$ export HADOOP_HOME=/opt/module/hadoop-2.7.2
$ export HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase mapredcp`
(2) Permanently effective: configured in /etc/profile
export HBASE_HOME=/opt/module/hbase
export HADOOP_HOME=/opt/module/hadoop-2.7.2
And configure it in hadoop-env.sh: (Note: Configure after the for loop)
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase-1.3.1/lib/*
3.Run the official MapReduce task--
Case 1: Count how many rows of data there are in the Student table
/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar rowcounter stu
Case 2: Using MapReduce to import local data into HBase
1) Create a file in tsv format locally: fruit.tsv (note that the delimiter here is the TAB key)
1001 Apple Red
1002 Pear Yellow
1003 Pineapple Yellow
3) Upload to hadoop
hadoop fs -put fruit.tsv /
4) Execute MapReduce into the fruit table of HBase
/opt/module/hadoop-3.1.3/bin/yarn jar lib/hbase-server-1.3.1.jar importtsv -Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit hdfs://hadoop101:9000/input_fruit
2) Create Hbase table
Hbase(main):001:0> create 'fruit','info'
5) Use the scan command to view the imported results
hbase(main):011:0> scan 'fruit'
ROW COLUMN+CELL
1001 column=info:color, timestamp=1642253156646, value=Red
1001 column=info:name, timestamp=1642253156646, value=Apple
1002 column=info:color, timestamp=1642253156646, value=Yellow
1002 column=info:name, timestamp=1642253156646, value=Pear
1003 column=info:color, timestamp=1642253156646, value=Yellow
1003 column=info:name, timestamp=1642253156646, value=Pineapple
3 row(s) in 0.2760 seconds
2 Customize HBase-MapReduce1
Goal: Move part of the data in the fruit table into the fruit_mr table through MR.
Step by step implementation:
1. Build the ReadFruitMapper class for reading data in the fruit table
package com.atguigu.mr;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class FruitMapper extends Mapper<LongWritable, Text,LongWritable,Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(key,value);
}
}
2. Build the WriteFruitMRReducer class to write the read data from the fruit table to the fruit_mr table
package com.atguigu.mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FruitReducer extends TableReducer<LongWritable, Text, NullWritable> {
//可以进行动态传参
String cf1;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
Configuration configuration = context.getConfiguration();
cf1 = configuration.get("cf1");
}
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
//1.遍历values
for (Text value : values) {
//获取每一行数据
String[] fields = value.toString().split("\t");
//3.构建put对象
Put put = new Put(Bytes.toBytes(fields[0]));
//4.给put对象赋值
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(fields[1]));
put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("color"),Bytes.toBytes(fields[2]));
//5. 写出
context.write(NullWritable.get(),put);
}
}
}
3.Build Fruit2FruitMRRunner extends Configured implements Tool for assembling and running Job tasks
package com.atguigu.mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* @author:左泽林
* @date:日期:2022-01-16-时间:8:55
* @message:
*/
public class FruitDriver implements Tool {
//定义一个COnfiguration
private Configuration configuration = null;
public int run(String[] args) throws Exception {
//1.获取Job对象
Job job = Job.getInstance(configuration);
//2. 设置驱动类路径
job.setJarByClass(FruitDriver.class);
//3. 设置mapper&mapper输出的KV类型
job.setMapperClass(FruitMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
//4. 设置Reducer类
TableMapReduceUtil.initTableReducerJob(args[1] , FruitReducer.class , job);
//5. 设置输入输出的参数
FileInputFormat.setInputPaths(job,new Path(args[0]));
//6. 提交任务
boolean result = job.waitForCompletion(true);
return result ? 0 : 1;
}
public void setConf(Configuration configuration) {
this.configuration = configuration;
}
public Configuration getConf() {
return null;
}
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration, new FruitDriver(), args);
System.exit(run);
}
}
4.Call the main function to run the Job task
5.Package and run tasks
6. Upload the jar package to the virtual machine, create the fruit1 table in hbase, and run the created jar package
Create fruit table:
hbase(main):003:0* create 'fruit1','info'
0 row(s) in 1.8160 seconds
=> Hbase::Table - fruit1
Run jar package
yarn jar hbase-1.0-SNAPSHOT.jar com.atguigu.mr.FruitDriver /input/fruit.tsv fruit1
View the results in the fruit1 table:
hbase(main):004:0> scan 'fruit1'
ROW COLUMN+CELL
1001 column=info:color, timestamp=1642298137576, value=Red
1001 column=info:name, timestamp=1642298137576, value=Apple
1002 column=info:color, timestamp=1642298137576, value=Yellow
1002 column=info:name, timestamp=1642298137576, value=Pear
1003 column=info:color, timestamp=1642298137576, value=Yellow
1003 column=info:name, timestamp=1642298137576, value=Pineapple
3 row(s) in 0.4790 seconds
3 Customize Hbase-MapReduce2
Goal: To write data in HDFS to Hbase table.
Step by step implementation:
1. Build ReadFruitFromHDFSMapper to read file data in HDFS
package com.atguigu.mr2;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable , Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
//构建Put对象
Put put = new Put(key.get());
//1.获取数据
for (Cell cell : value.rawCells()) {
//2.判断当前的cell是否为”name“列
if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
//3.给Put对象赋值
put.add(cell);
}
}
//4.写出
context.write(key,put);
}
}
2. Build the WriteFruitMRFromTxtReducer class
package com.atguigu.mr2;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
public class Fruit2Reducer extends TableReducer<ImmutableBytesWritable , Put , NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
//1。遍历写出
for (Put value : values) {
context.write(NullWritable.get(),value);
}
}
}
3.Create Txt2FruitRunner Assembly Job
package com.atguigu.mr2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Fruit2Driver implements Tool {
//定义配置信息
private Configuration configuration = null;
public int run(String[] args) throws Exception {
//1.获取Job对象
Job job = Job.getInstance(configuration);
//2. 设置主类路径
job.setJarByClass(Fruit2Driver.class);
//3.设置Mapper&输出KV类型
TableMapReduceUtil.initTableMapperJob(
"fruit",
new Scan(),
Fruit2Mapper.class,
ImmutableBytesWritable.class,
Put.class,
job
);
//4.设置Reducer&输出的表
TableMapReduceUtil.initTableReducerJob(
"fruit12",
Fruit2Reducer.class,
job
);
//5.提交任务
boolean result = job.waitForCompletion(true);
return result ? 0 : 1;
}
public void setConf(Configuration configuration) {
this.configuration = configuration;
}
public Configuration getConf() {
return configuration;
}
public static void main(String[] args) throws Exception {
Configuration configuration = HBaseConfiguration.create();
ToolRunner.run(configuration, new Fruit2Driver() , args);
}
}