MapReduce二次排序及自定义Writable

本文链接： https://blog.csdn.net/qq_18505209/article/details/100545211

数据：

Input：

hello world hello world dog fish
dog fish
hadoop
spark hello world dog fish
hello world spark hello world dog fish
dog fish spark hello world hadoop dog fish spark hello world
hadoop dog fish hello world dog fish hadoop
hadoop hadoop dog fish
spark dog fish hello world
hello world spark dog fish dog fish

Output：

dog 12
fish 12
hello 10
world 10
hadoop 6
spark 6

代码：

package MapReducer05;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Set;
import java.util.TreeSet;

public class WordCount {
    public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] str = value.toString().split(" ");
            for(String s : str){
                context.write(new Text(s), new IntWritable(1));
            }
        }
    }
    public static class MyReducer extends Reducer<Text, IntWritable, WordWritable, NullWritable>{

        Set<WordWritable> set = new TreeSet<WordWritable>();
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int count = 0;
            for(IntWritable iw : values){
                count += iw.get();
            }
            WordWritable ww = new WordWritable();
            ww.setWord(key.toString());
            ww.setCount(count);
            set.add(ww);
        }

        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            for(WordWritable w : set){
                context.write(w, NullWritable.get());
            }
        }
    }
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    	//System.setProperty("hadoop.home.dir", "F:\\hadoop-2.6.4");
        Configuration conf = new Configuration();
        BasicConfigurator.configure();
        Job job = Job.getInstance(conf, "mr");
        job.setJarByClass(WordCount.class);
        
        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(WordWritable.class);
        job.setOutputValueClass(NullWritable.class);

        FileInputFormat.addInputPath(job, new Path("C:\\Users\\Chen\\Desktop\\input\\wordcount"));
        FileOutputFormat.setOutputPath(job, new Path("C:\\Users\\Chen\\Desktop\\20"));

        if(job.waitForCompletion(true)){
            System.exit(0);
        } else{
            System.exit(1);
        }
    }

}
class WordWritable implements WritableComparable<WordWritable>{
    private String word;
    private int count;
    public WordWritable(){}
    public WordWritable(String word, int count) {
        this.word = word;
        this.count = count;
    }

    public int compareTo(WordWritable o) {
        int ff = o.getCount()-this.getCount();
        if(ff==0){
            return this.getWord().compareTo(o.getWord());
        } else{
            return ff;
        }
    }

    @Override
    public String toString() {
        return this.getWord()+" "+this.getCount();
    }

    public void write(DataOutput out) throws IOException {
        out.writeUTF(word);
        out.writeInt(count);
    }

    public void readFields(DataInput in) throws IOException {
        this.word = in.readUTF();
        this.count = in.readInt();
    }

    public String getWord() {
        return word;
    }

    public void setWord(String word) {
        this.word = word;
    }

    public int getCount() {
        return count;
    }

    public void setCount(int count) {
        this.count = count;
    }
}

MapReduce二次排序及自定义Writable

数据：

Input：

Output：

代码：

猜你喜欢