1 Environmental preparation
1) Successfully built hadoop-2.6.0-cdh5.10.0, hbase-1.2.0-cdh5.10.0 development environment
2) Successfully started dfs, yarn, zookeeper, master, regionserver, and tested through HBase Shell
sbin/start-dfs.sh
sbin/start-yarn.sh
bin/hbase-daemon.sh start zookeeper
bin/hbase-daemon.sh start master
bin/hbase-daemon.sh start regionserver
3) Use Eclipse as a development tool and import the following two dependencies
2 Create the project
We use the HBase Java API to practice this project.
3 Create hdfs directory and upload data to HBases
(1) The content of the data set data.txt is as follows
public static class extends private new
private final static void
key value context context
throws exception
value string line value value string
split
string string new string line Value
while
string value
string word value string
set word value
context write public static class
word extends text text private
new void key value context context
throws int sum for get
output set sum context write key output value
(2) Create directory and upload data
bin/hadoop fs -mkdir -p /user/root/hbase/input/
bin/hadoop fs -put /home/chenbo/data/hadoop/data.txt /user/root/hbase/input/
(3) The code is as follows
import java.nio.file.Path;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.util.Tool;
public class WordCountUpLoadToHBase extends Configured {
public static class WCHBaseMapper extends Mapper<Object, Text, ImmutableBytesWritable, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key,Text value,Context context) throws IOException, InterruptedException{
StringTokenizer strs = new StringTokenizer(value.toString());
while(strs.hasMoreTokens()){
word.set(strs.nextToken());
context.write(new ImmutableBytesWritable(Bytes.toBytes(word.toString())), one);
}
}
}
public static class WCHBaseReducer extends TableReducer<ImmutableBytesWritable, IntWritable, ImmutableBytesWritable>{
public void reduce(ImmutableBytesWritable key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{
int sum = 0;
for(IntWritable val:values){
sum += val.get();
}
Put put = new Put(key.get());
put.add(Bytes.toBytes("content"),Bytes.toBytes("count"),Bytes.toBytes(sum+""));
context.write(key, put);
}
}
@SuppressWarnings("all")
public static void main(String[] args) throws MasterNotRunningException, ZooKeeperConnectionException, IOException, ClassNotFoundException, InterruptedException {
// TODO Auto-generated method stub
String tableName = "wordcount";
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","192.168.159.130");
conf.set("hbase.zookeeper.property.clientPort","2181");
HBaseAdmin admin = new HBaseAdmin(conf);
if(admin.tableExists(tableName)){
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
HColumnDescriptor columnDescriptor =new HColumnDescriptor("content");
tableDescriptor.addFamily(columnDescriptor);
admin.createTable(tableDescriptor);
Job job = new Job(conf,"upload to hbase");
job.setJarByClass(WordCountUpLoadToHBase.class);
job.setMapperClass(WCHBaseMapper.class);
TableMapReduceUtil.initTableReducerJob(tableName, WCHBaseReducer.class, job,null,null,null,null,false);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
FileInputFormat.addInputPaths(job, "hdfs://192.168.159.130:8020/user/root/hbase/input/data.txt");
System.exit(job.waitForCompletion(true)?0:1);
}
}
4 Obtain data from HBase and upload the calculation results to HDFS
4.1 Create MRReadFormHbase.class
import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MRReadFromHbase extends Configured {
public static class WCHBaseMapper extends TableMapper<Text, Text>{
@Override
public void map(ImmutableBytesWritable key,Result values,Context context) throws IOException, InterruptedException{
StringBuffer sb =new StringBuffer("");
for(Map.Entry<byte[], byte[]> value:values.getFamilyMap("content".getBytes()).entrySet()){
String str =new String(value.getValue());
if(str!=null){
sb.append(str);
}
context.write(new Text(key.get()), new Text(sb.toString()));
}
}
}
public static class WCHBaseReducer extends Reducer<Text, Text, Text, Text>{
private Text result =new Text();
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException{
for(Text val:values){
result.set(val);
context.write(key,result);
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// TODO Auto-generated method stub
String tableName = "wordcount";
Configuration conf =HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","192.168.159.130");
conf.set("hbase.zookeeper.property.clientPort","2181");
Job job =new Job(conf,"read from hbase to hdfs");
job.setJarByClass(MRReadFromHbase.class);
job.setReducerClass(WCHBaseReducer.class);
TableMapReduceUtil.initTableMapperJob(tableName, new Scan(), WCHBaseMapper.class, Text.class, Text.class, job);
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.159.130:8020/user/root/hbase/output/"));
System.exit(job.waitForCompletion(true)?0:1);
}
}