HBase和MapReduce集成
public class CustomJobSubmiter extends Configured implements Tool {
public int run(String[] args) throws Exception {
//创建Job
Configuration config = HBaseConfiguration.create(getConf());
Job job = Job.getInstance(config);
job.setJarByClass(CustomJobSubmiter.class); // class that contains mapper
//设置输入、输出格式
job.setInputFormatClass(TableInputFormat.class);
job.setOutputFormatClass(TableOutputFormat.class);
TableMapReduceUtil.initTableMapperJob(
"zpark:t_user",
new Scan(),
UserMapper.class,
Text.class,
DoubleWritable.class,
job
);
TableMapReduceUtil.initTableReducerJob(
"zpark:t_user_count",
UserReducer.class,
job
);
job.setCombinerClass(UserCombiner.class);
job.waitForCompletion(true);
return 0;
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new CustomJobSubmiter(),args);
}
public static class UserMapper extends TableMapper<Text, CountWritable>{
private Text k=new Text();
private DoubleWritable v=new DoubleWritable();
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
String company= Bytes.toString(value.getValue("cf1".getBytes(),"company".getBytes()));
Double salary= Bytes.toInt(value.getValue("cf1".getBytes(),"salary".getBytes()))*1.0;
k.set(company);
v.set(salary);
context.write(k,new CountWritable(1,salary,salary,salary));
}
}
public static class UserCombiner extends Reducer<Text, CountWritable,Text, CountWritable>{
@Override
protected void reduce(Text key, Iterable<CountWritable> values, Context context) throws IOException, InterruptedException {
int total=0;
double tatalSalary=0.0;
double avgSalary=0.0;
double maxSalary=0.0;
double minSalary=Integer.MAX_VALUE;
for (CountWritable value : values) {
tatalSalary+=value.getTatalSalary();
total+=value.getTotal();
if(minSalary>value.getMinSalary()){
minSalary=value.getMinSalary();
}
if(maxSalary<value.getMaxSalary()){
maxSalary=value.getMaxSalary();
}
}
context.write(key,new CountWritable(total,tatalSalary,maxSalary,minSalary));
}
}
public static class UserReducer extends TableReducer<Text, CountWritable, NullWritable>{
@Override
protected void reduce(Text key, Iterable<CountWritable> values, Context context) throws IOException, InterruptedException {
int total=0;
double tatalSalary=0.0;
double avgSalary=0.0;
double maxSalary=0.0;
double minSalary=Integer.MAX_VALUE;
for (CountWritable value : values) {
tatalSalary+=value.getTatalSalary();
total+=value.getTotal();
if(minSalary>value.getMinSalary()){
minSalary=value.getMinSalary();
}
if(maxSalary<value.getMaxSalary()){
maxSalary=value.getMaxSalary();
}
}
avgSalary=tatalSalary/total;
Put put=new Put(key.getBytes());
put.addColumn("cf1".getBytes(),"taotal".getBytes(),(total+"").getBytes());
put.addColumn("cf1".getBytes(),"tatalSalary".getBytes(),(tatalSalary+"").getBytes());
put.addColumn("cf1".getBytes(),"maxSalary".getBytes(),(maxSalary+"").getBytes());
put.addColumn("cf1".getBytes(),"minSalary".getBytes(),(minSalary+"").getBytes());
put.addColumn("cf1".getBytes(),"avgSalary".getBytes(),(avgSalary+"").getBytes());
context.write(null,put);
}
}
}
public class CountWritable implements Writable {
int total=0;
double tatalSalary=0.0;
double maxSalary=0.0;
double minSalary=Integer.MAX_VALUE;
public CountWritable(int total, double tatalSalary, double maxSalary, double minSalary) {
this.total = total;
this.tatalSalary = tatalSalary;
this.maxSalary = maxSalary;
this.minSalary = minSalary;
}
public CountWritable() {
}
public void write(DataOutput out) throws IOException {
out.writeInt(total);
out.writeDouble(tatalSalary);
out.writeDouble(maxSalary);
out.writeDouble(minSalary);
}
public void readFields(DataInput in) throws IOException {
total=in.readInt();
tatalSalary=in.readDouble();
maxSalary=in.readDouble();
minSalary=in.readDouble();
}
//....
}
上一篇:java API操作HBase(二)
下一篇:HBase架构(四)