mapred代码示例--自定义分组

package group;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

/**
* 自定义分组
*
* 按第一列分组,输出每组最大值
* 3 3
* 3 1
* 3 2
* 2 1
* 2 2
* 1 1
* @author Administrator
*
*/
public class GroupApp {
public static final String INPUT_PATH = "hdfs://hadoop:9000/in/sort";
public static final String OUT_PATH = "hdfs://hadoop:9000/out";

/**
* @param args
* @throws URISyntaxException
* @throws IOException
* @throws ClassNotFoundException
* @throws InterruptedException
*/
public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
final FileSystem fileSystem = FileSystem.get(new URI(OUT_PATH), conf);
fileSystem.delete(new Path(OUT_PATH), true);

final Job job = new Job(conf , GroupApp.class.getSimpleName());
job.setJarByClass(GroupApp.class);

FileInputFormat.setInputPaths(job, INPUT_PATH);
job.setInputFormatClass(TextInputFormat.class);

job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(NewK2Writable.class);
job.setMapOutputValueClass(NullWritable.class);

job.setGroupingComparatorClass(MyGroup.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);

FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
job.setOutputFormatClass(TextOutputFormat.class);

job.waitForCompletion(true);

final FSDataInputStream in = fileSystem.open(new Path(OUT_PATH+"/part-r-00000"));
IOUtils.copyBytes(in, System.out, 1024, true);


}

public static class MyMapper extends Mapper<LongWritable, Text, NewK2Writable, NullWritable>{
final NewK2Writable k2 = new NewK2Writable();
final IntWritable v2 = new IntWritable();

protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,NewK2Writable,NullWritable>.Context context) throws java.io.IOException ,InterruptedException {
final String[] splited = value.toString().split("\t");
k2.set(Integer.parseInt(splited[0]),Integer.parseInt(splited[1]));
context.write(k2, NullWritable.get());
};
}

public static class MyReducer extends Reducer<NewK2Writable, NullWritable,IntWritable, IntWritable>{
final IntWritable k3 = new IntWritable();
final IntWritable v3 = new IntWritable();
protected void reduce(NewK2Writable k2, java.lang.Iterable<NullWritable> value2s, org.apache.hadoop.mapreduce.Reducer<NewK2Writable, NullWritable,IntWritable,IntWritable>.Context context) throws java.io.IOException ,InterruptedException {
int max = Integer.MIN_VALUE;
for (NullWritable v2 : value2s) {
if(k2.num2>max){
max = k2.num2;
}
}
k3.set(k2.num1);
v3.set(max);
context.write(k3,v3);
};
}

public static class MyGroup implements RawComparator<NewK2Writable>{
@Override
public int compare(NewK2Writable o1, NewK2Writable o2) {
// TODO Auto-generated method stub
return 0;
}

/**
* 由于key为二个int字段,取第一个int4字节作为分组规则
*/
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {

return WritableComparator.compareBytes(b1, s1, 4, b2, s2, 4);
}

}


public static class NewK2Writable implements WritableComparable<NewK2Writable>{
int num1;
int num2;

@Override
public void write(DataOutput out) throws IOException {
out.writeInt(num1);
out.writeInt(num2);
}

public void set(int num1, int num2) {
this.num1 = num1;
this.num2 = num2;

}

@Override
public void readFields(DataInput in) throws IOException {
this.num1 = in.readInt();
this.num2 = in.readInt();
}

@Override
public int compareTo(NewK2Writable o) {
if(this.num1 != o.num1){
return this.num1 - o.num1;
}else{
return this.num2 - o.num2;
}
}

}
}

猜你喜欢

转载自jsh0401.iteye.com/blog/2111928