MapReduce输出结果导入HBase
这里以wordCount为例简单介绍下,代码实现MapReduce计算完数据导入到HBase中
Job.java
package com.shsxt.mapre.tohbase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class WcJob {
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
//hdfs 主nameNode通信地址
conf.set("fs.defaultFS", "hdfs://node01:8020");
//yarn 主resourcemanager通信地址
conf.set("yarn.resourcemanager.hostname", "node02:8088");
//zookeeper集群,连接到HMaster
conf.set("hbase.zookeeper.quorum", "node01,node02,node03");
Job job = Job.getInstance(conf);
job.setJarByClass(WcJob.class);
job.setJobName("mapReduceToHBase");
job.setMapperClass(WcMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//hdfs文件存储路径
FileInputFormat.addInputPaths(job,"/data/input/wordcount.txt");
//实现reduce结果输出到 HBase的哪个表
TableMapReduceUtil.initTableReducerJob("hbase", WcReduce.class, job);
try {
System.out.println(job.waitForCompletion(true));
} catch (ClassNotFoundException | InterruptedException e) {
e.printStackTrace();
}
}
}
Mapper.java
和正常的map程序没区别,映射关系,输出
package com.shsxt.mapre.tohbase;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WcMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String spilts = value.toString();
StringTokenizer words = new StringTokenizer(spilts);
while(words.hasMoreTokens()){
context.write(new Text(words.nextToken()), new IntWritable(1));
}
}
}
Reduce.java
package com.shsxt.mapre.tohbase;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WcReduce extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable i : values) {
sum +=i.get();
}
//设置word为Row Key 构建 Put对象
Put put = new Put(key.toString().getBytes());
//指定插入到哪个列族,插入的列名和值
put.add("cf1".getBytes(), "count".getBytes(), (sum+"").getBytes());
context.write(null, put);
}
}
注意:运行代码前,hbase中表一定得存在,列族必须存在
建表语句(也可以用JavaApi实现)
create 'hbase','cf1'
执行结果:
具体业务,具体执行分析,完!!