MapReduce排序-实现比较器和序列化代码

要求:

第一列按照字典顺序进行排列
第一列相同的时候, 第二列按照升序进行排列

解决思路:

将 Map 端输出的 <key,value> 中的 key 和 value 组合成一个新的 key (newKey),value值不变
这里就变成 <(key,value),value> , 在针对 newKey 排序的时候, 如果 key 相同, 就再对value进行排序

自定义类型和比较器

public class PairWritable implements WritableComparable<PairWritable> {

	// 组合key,第一部分是我们第一列，第二部分是我们第二列
	private String first;
	
	private int second;
	
	public PairWritable() {
	
	}
	
	public PairWritable(String first, int second) {
		this.set(first, second);
	}
	
	/**
	* 方便设置字段
	*/
	public void set(String first, int second) {
		this.first = first;
		this.second = second;
	}
	
	/**
	* 反序列化
	*/
	@Override
	public void readFields(DataInput input) throws IOException {
		this.first = input.readUTF();
		this.second = input.readInt();
	}
	
	/**
	* 序列化
	*/
	@Override
	public void write(DataOutput output) throws IOException {
		output.writeUTF(first);
		output.writeInt(second);
	}
	
	/*
	* 重写比较器
	*/
	public int compareTo(PairWritable o) {
		//每次比较都是调用该方法的对象与传递的参数进行比较，说白了就是第一行与第
		二行比较完了之后的结果与第三行比较，
		//得出来的结果再去与第四行比较，依次类推
		System.out.println(o.toString());
		Step 2. Mapper
		System.out.println(this.toString());
		int comp = this.first.compareTo(o.first);
		if (comp != 0) {
			return comp;
		} else { // 若第一个字段相等，则比较第二个字段
			return Integer.valueOf(this.second).compareTo(
			Integer.valueOf(o.getSecond()));
		}
	}
	
	public int getSecond() {
		return second;
	}
	
	public void setSecond(int second) {
		this.second = second;
	}
	
	public String getFirst() {
		return first;
	}
	
	public void setFirst(String first) {
		this.first = first;
	}
	@Override
	public String toString() {
		return "PairWritable{" +
		"first='" + first + '\'' +
		", second=" + second +
		'}';
	}

}

Mapper

public class SortMapper extends
Mapper<LongWritable,Text,PairWritable,IntWritable> {

	private PairWritable mapOutKey = new PairWritable();
	
	private IntWritable mapOutValue = new IntWritable();
	
	@Override
	public void map(LongWritable key, Text value, Context context)
		throws IOException, InterruptedException {
			String lineValue = value.toString();
			String[] strs = lineValue.split("\t");
			//设置组合key和value ==> <(key,value),value>
			mapOutKey.set(strs[0], Integer.valueOf(strs[1]));
			mapOutValue.set(Integer.valueOf(strs[1]));
			context.write(mapOutKey, mapOutValue);
	}
	
}

Reducer

public class SortReducer extends
Reducer<PairWritable,IntWritable,Text,IntWritable> {

	private Text outPutKey = new Text();
	
	@Override
	public void reduce(PairWritable key, Iterable<IntWritable> values,
	Context context) throws IOException, InterruptedException {
		//迭代输出
		for(IntWritable value : values) {
			outPutKey.set(key.getFirst());
			context.write(outPutKey, value);
		}
	}
	
}

Main 入口

public class SecondarySort extends Configured implements Tool {

	@Override
	public int run(String[] args) throws Exception {
		Configuration conf = super.getConf();
		conf.set("mapreduce.framework.name","local");
		Job job = Job.getInstance(conf,
		SecondarySort.class.getSimpleName());
		job.setJarByClass(SecondarySort.class);
		job.setInputFormatClass(TextInputFormat.class);
		TextInputFormat.addInputPath(job,new Path("file:///L:\\大数据离线
		阶段备课教案以及资料文档——by老王\\4、大数据离线第四天\\排序\\input"));
		TextOutputFormat.setOutputPath(job,new Path("file:///L:\\大数据离
		线阶段备课教案以及资料文档——by老王\\4、大数据离线第四天\\排序\\output"));
		job.setMapperClass(SortMapper.class);
		job.setMapOutputKeyClass(PairWritable.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setReducerClass(SortReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		boolean b = job.waitForCompletion(true);
		return b?0:1;
	}
	
	public static void main(String[] args) throws Exception {
		Configuration entries = new Configuration();
		ToolRunner.run(entries,new SecondarySort(),args);
	}
	
}

Leon_Jinhai_Sun

发布了2209 篇原创文章 · 获赞 50 · 访问量 15万+

他的留言板关注

MapReduce排序-实现比较器和序列化代码

猜你喜欢