MapReduce输出结果导入HBase

MapReduce输出结果导入HBase

这里以wordCount为例简单介绍下,代码实现MapReduce计算完数据导入到HBase中

Job.java

package com.shsxt.mapre.tohbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

public class WcJob {
	
	public static void main(String[] args) throws IOException {
		Configuration conf = new Configuration();
        //hdfs 主nameNode通信地址
		conf.set("fs.defaultFS", "hdfs://node01:8020");
        //yarn 主resourcemanager通信地址
		conf.set("yarn.resourcemanager.hostname", "node02:8088");
        //zookeeper集群,连接到HMaster
		conf.set("hbase.zookeeper.quorum", "node01,node02,node03");
		
		Job job = Job.getInstance(conf);
		job.setJarByClass(WcJob.class);
		job.setJobName("mapReduceToHBase");
		
		job.setMapperClass(WcMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		//hdfs文件存储路径
		FileInputFormat.addInputPaths(job,"/data/input/wordcount.txt");
		//实现reduce结果输出到 HBase的哪个表
		TableMapReduceUtil.initTableReducerJob("hbase", WcReduce.class, job);
		
		try {
			System.out.println(job.waitForCompletion(true));
		} catch (ClassNotFoundException | InterruptedException e) {
			e.printStackTrace();
		}
	}
}

Mapper.java

和正常的map程序没区别,映射关系,输出

package com.shsxt.mapre.tohbase;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WcMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
	
	@Override
	protected void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException {
		
		String spilts = value.toString();
		StringTokenizer words = new StringTokenizer(spilts);
		
		while(words.hasMoreTokens()){
			context.write(new Text(words.nextToken()), new IntWritable(1));
		}
	}
}

Reduce.java

package com.shsxt.mapre.tohbase;

import java.io.IOException;

import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WcReduce extends TableReducer<Text, IntWritable, ImmutableBytesWritable> {
	
	@Override
	protected void reduce(Text key, Iterable<IntWritable> values,
			Context context)
			throws IOException, InterruptedException {
		
		int sum = 0;
		for (IntWritable i : values) {
			sum +=i.get();
		}
        //设置word为Row Key 构建 Put对象
		Put put = new Put(key.toString().getBytes());
        //指定插入到哪个列族,插入的列名和值
		put.add("cf1".getBytes(), "count".getBytes(), (sum+"").getBytes());
		
		context.write(null, put);
	}
}

注意:运行代码前,hbase中表一定得存在,列族必须存在

建表语句(也可以用JavaApi实现)

create 'hbase','cf1'

执行结果:
在这里插入图片描述

具体业务,具体执行分析,完!!

猜你喜欢

转载自blog.csdn.net/weixin_43270493/article/details/86532741