更新：MapReduce编程之自定义序列化类及自定义排序2

更新版的主要是解决上一篇文章里面比较不合理的地方

上一篇文章中在Reduce类里面重写了cleanup方法用于进行第二次排序，虽然可以实现要求，但是比较不科学

在本文章中将分两个两个MapReduce任务来执行。

补充概念：在Hadoop中，每个MapReduce任务都被初始化为一个job,每个job又可分为两个阶段:map阶段和reduce阶段。这两个阶段分别用两个函数来表示。Map函数接收一个<key,value>形式的输入，然后同样产生一个<key,value>形式的中间输出，Hadoop会负责将所有具有相同中间key值的value集合在一起传递给reduce函数，reduce函数接收一个如<key,(list of values)>形式的输入，然后对这个value集合进行处理，每个reduce产生0或1个输出，reduce的输出也是<key,value>形式。

TradeBean类：

package com.wqs.myWritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class TradeBean implements WritableComparable<TradeBean>{
	private String name;
	private int income;
	private int pay;
	private int profit;
	
	public TradeBean() {
		super();
		// TODO 自动生成的构造函数存根
	}

	public TradeBean(String name, int income, int pay, int profit) {
		super();
		this.name = name;
		this.income = income;
		this.pay = pay;
		this.profit = profit;
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		name = in.readUTF();
		income = in.readInt();
		pay = in.readInt();
		profit = in.readInt();
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeUTF(name);
		out.writeInt(income);
		out.writeInt(pay);
		out.writeInt(profit);
	}

	@Override
	public int compareTo(TradeBean tradeBean) {
		if(this.profit > tradeBean.getProfit()) return -1;
		else if(this.profit < tradeBean.getProfit()) return 1;
		else if(this.income > tradeBean.getIncome()) return -1;
		else if(this.income < tradeBean.getIncome()) return -1;
		else return 0;
	}
	
	@Override
	public String toString() {
		return name + " " + income + " " + pay + " " + profit;
	}

	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}

	public int getIncome() {
		return income;
	}

	public void setIncome(int income) {
		this.income = income;
	}

	public int getPay() {
		return pay;
	}

	public void setPay(int pay) {
		this.pay = pay;
	}

	public int getProfit() {
		return profit;
	}

	public void setProfit(int profit) {
		this.profit = profit;
	}

}

Map类：

package com.wqs.myWritableComparable;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class Map extends Mapper<Object, Text, Text, TradeBean>{
	private TradeBean bean = new TradeBean();
	private Text name = new Text();
	@Override
	protected void map(Object key, Text value, Context context) throws IOException, InterruptedException{
		String line = value.toString();
		String[] temp = line.split(" ");
		name.set(temp[0]);
		bean.setName(temp[0]);
		bean.setIncome(Integer.valueOf(temp[1]));
		bean.setPay(Integer.valueOf(temp[2]));
		bean.setProfit(0);
		context.write(name, bean);
	}
}

Reduce类：

package com.wqs.myWritableComparable;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class Reduce extends Reducer<Text, TradeBean, TradeBean, NullWritable>{
	@Override
	protected void reduce(Text k2, Iterable<TradeBean> vs2, Context context) 
			throws IOException, InterruptedException {
		String name = null;
		int income = 0;
		int pay = 0;
		int profit = 0;
		for (TradeBean tradeBean : vs2) {
			income += tradeBean.getIncome();
			pay += tradeBean.getPay();
		}
		name = k2.toString();
		profit = income - pay;
		context.write(new TradeBean(name, income, pay, profit), NullWritable.get());
	}
}

Map2类：

package com.wqs.myWritableComparable;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class Map2 extends Mapper<Object, Text, TradeBean, NullWritable> {
	private TradeBean bean = new TradeBean();
	@Override
	protected void map(Object key, Text value, Context context) throws IOException, InterruptedException{
		String line = value.toString();
		String[] temp = line.split(" ");
		bean.setName(temp[0]);
		bean.setIncome(Integer.valueOf(temp[1]));
		bean.setPay(Integer.valueOf(temp[2]));
		bean.setProfit(Integer.valueOf(temp[3]));
		context.write(bean, NullWritable.get());
	}
}

Reduce2类：

package com.wqs.myWritableComparable;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

public class Reduce2 extends Reducer<TradeBean, NullWritable, TradeBean, NullWritable> {
	@Override
	protected void reduce(TradeBean k2, Iterable<NullWritable> vs2, Context context) 
			throws IOException, InterruptedException {
		context.write(k2, NullWritable.get());
	}
}

Main类：

package com.wqs.myWritableComparable;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Main {
	
	public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        System.setProperty("hadoop.home.dir", "E:/hadoop-2.7.7");
		args = new String[] { "/demo03/in/", "/demo03/out", "/demo03/out2" };
        String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
        if(otherArgs.length != 3){
            System.err.println("Usage:InvertedIndex");
            System.exit(2);
        }
        Job job = Job.getInstance();
        job.setJarByClass(Main.class);
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(TradeBean.class);
        job.setOutputKeyClass(TradeBean.class);
        job.setOutputValueClass(NullWritable.class);
        FileInputFormat.addInputPath(job, new Path("hdfs://192.168.222.128:9000" + args[0]));
        FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.222.128:9000" + args[1]));
        
        Job job2 = Job.getInstance();
        job2.setJarByClass(Main.class);
        job2.setMapperClass(Map2.class);
        job2.setReducerClass(Reduce2.class);
        job2.setMapOutputKeyClass(TradeBean.class);
        job2.setMapOutputValueClass(NullWritable.class);
        job2.setOutputKeyClass(TradeBean.class);
        job2.setOutputValueClass(NullWritable.class);
        FileInputFormat.addInputPath(job2, new Path("hdfs://192.168.222.128:9000" + args[1]));
        FileOutputFormat.setOutputPath(job2, new Path("hdfs://192.168.222.128:9000" + args[2]));
        
        //等待job执行完毕之后再执行job2
        if (job.waitForCompletion(true)) {
        	System.exit(job2.waitForCompletion(true) ? 0 : 1);
        }
    }
}

更新：MapReduce编程之自定义序列化类及自定义排序2

猜你喜欢