mapreduce算法之二次排序

package mapreduce;


import java.net.URI;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.StringTokenizer;


import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;






import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.io.WritableComparable;




public class SecondrySort{
static String input="hdfs://master:9000/sort/input";
static String output="hdfs://master:9000/output2";
static Path output_path=new Path(output);

static Path input_path=new Path(input);

public static class NewKey implements WritableComparable<NewKey> {


    private int first;
    private int second;


    public int getFirst() {
        return first;
    }


    public int getSecond() {
        return second;
    }


    public void set(int first, int second) {
        this.first = first;
        this.second = second;
    }


    @Override
    public void readFields(DataInput in) throws IOException {
        first = in.readInt();
        second = in.readInt();
    }


    @Override
    public void write(DataOutput out) throws IOException {
        out.writeInt(first);
        out.writeInt(second);
    }


    //对key排序时,调用这个compareTo方法
    @Override
    public int compareTo(NewKey o) {
        if (first != o.first) {
            return first - o.first;
        } else if (second != o.second) {
            return second - o.second;
        } else {
            return 0;
        }
    }


    //新定义的类应该重写下面两个方法 
    @Override
    public int hashCode() {
        return first+"".hashCode() + second+"".hashCode();
    }


    @Override
    public boolean equals(Object first) {
        if (first instanceof NewKey){
            NewKey r = (NewKey) first;
            return r.first == this.first && r.second == this.second;
        }else{
            return false;
        }
    }
}
public static  class GroupingComparator implements RawComparator<NewKey>{


    @Override
    public int compare(NewKey o1, NewKey o2) {
        int l = o1.getFirst();
        int r = o2.getFirst();
        return l == r ? 0 : (l < r ? 1 : 1);
    }


    //一个字节一个字节的比,直到找到一个不相同的字节时比较这个字节的大小作为两个字节流的大小比较结果。
    @Override
    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
        return WritableComparator.compareBytes(b1, s1, Integer.SIZE/8, b2, s2, Integer.SIZE/8);
    }
}
public static  class MyMapper extends Mapper<LongWritable, Text, NewKey, IntWritable>{


    private NewKey key = new NewKey();
    private IntWritable value = new IntWritable();


    @Override
    protected void map(LongWritable inKey, Text inValue, Context context)
            throws IOException, InterruptedException {
        StringTokenizer tokenizer = new StringTokenizer(inValue.toString());
        int first = 0;
        int second = 0;
        if(tokenizer.hasMoreTokens()){
            first = Integer.parseInt(tokenizer.nextToken());
            if(tokenizer.hasMoreTokens()) second = Integer.parseInt(tokenizer.nextToken());
            key.set(first, second);
            value.set(second);;
            context.write(key, value);
        }
    }
}
public static class  MyReducer extends Reducer<NewKey, IntWritable, Text, IntWritable>{


    private final Text first = new Text();
   


   // @Override
 public void reduce( NewKey newkey,Iterable<IntWritable> values,Context context)
     throws IOException, InterruptedException {
      first.set(Integer.toString(newkey.getFirst()));
     for(IntWritable val: values){
      context.write(first, val);
 }
 }
}
@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
    //获取JOB对象
    Configuration conf = new Configuration();
    Job job = new Job(conf);
    job.setJarByClass(SecondrySort.class);
FileSystem fs = FileSystem.get(URI.create(output), conf);
if(fs.exists(new Path(URI.create(output)))){
fs.delete(output_path);
};
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(NewKey.class);
    job.setMapOutputValueClass(IntWritable.class);


    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);


    job.setGroupingComparatorClass(GroupingComparator.class);


    FileInputFormat.addInputPath(job, input_path);
FileOutputFormat.setOutputPath(job, output_path);


    System.out.println(job.waitForCompletion(true)? 0 : 1);     
}   
}

猜你喜欢

转载自blog.csdn.net/oceansidexue/article/details/79212849
今日推荐