Map/Reduce 求TopK

输入

1,mr1,3234
2,mr2,123
3,mr3,9877
4,mr4,348
5,mr5,12345
6,mr6,6646
7,mr7,98
8,mr8,12345
输出

8	12345
5	12345
3	9877

代码:

import java.io.*;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class TopK extends Configured implements Tool{
	enum Counter
	{
		LINKSKIP,
	}
	public static class Mywritable implements WritableComparable<Mywritable>
	{
		private Integer num;
		public Mywritable(Integer num)
		{
			this.num=num;
		}
		public Mywritable() 
		{  
	    }
		public void readFields(DataInput arg0) throws IOException {
			// TODO Auto-generated method stub
			this.num=arg0.readInt();
		}

		public void write(DataOutput arg0) throws IOException {
			// TODO Auto-generated method stub
			arg0.writeInt(num);
		}

		public int compareTo(Mywritable o) {
			// TODO Auto-generated method stub
			int gap=this.num-o.num;
			return gap*(-1);
		}
		@Override  
	    public int hashCode() {  
	        return this.num.hashCode();  
	    } 
		@Override
		public boolean equals(Object obj)
		{
			if(!(obj instanceof Mywritable))
				return false;
			Mywritable obj1=(Mywritable)obj;
			return this.num==obj1.num;
		}
		@Override
		public String toString()
		{
			return num+"";
		}
	}
	public static class map extends Mapper<LongWritable,Text,Mywritable,IntWritable>
	{
		@Override
		public void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException
		{
			String line = value.toString().trim(); ;
			String[] split=line.split(",");
            //context.write(line, new Text(""));
			//N
			//1,mr1,3234 Integer.parseInt
			int pay=Integer.parseInt(split[2]);
			int num=Integer.parseInt(split[0]);
			Mywritable payment=new Mywritable(pay);
			context.write(payment,new IntWritable(num));
		}
	}
	public static class reduce extends Reducer<Mywritable,IntWritable,IntWritable,Mywritable>
	{
		private int idx=1;
		@Override
		public void reduce(Mywritable key,Iterable<IntWritable> values,Context context)throws IOException,InterruptedException
		{
			//context.write(key, new Text(""));
	        for(IntWritable value:values)
	        {
	        	if (idx <= 3) {
	        		idx++;
		            context.write(value, key);
		        }
	        }
		}
	}
	public int run(String[] args)throws Exception
	{
		Configuration conf=getConf();
		Job job=new Job(conf,"depulication");
		job.setJarByClass(TopK.class);
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		job.setMapperClass(map.class);
		job.setMapOutputKeyClass(Mywritable.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setReducerClass(reduce.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setOutputKeyClass(IntWritable.class);
		job.setOutputValueClass(Mywritable.class);
		job.waitForCompletion(true);
		return job.isSuccessful()?0:1;
	}
	public static void main(String[] args)throws Exception
	{
		int res=ToolRunner.run(new Configuration(),new TopK(),args);
		System.exit(res);
	}
}

猜你喜欢

转载自blog.csdn.net/qq_21003657/article/details/58602280
今日推荐