hadoop关于在ecplise中同一个main写两个job

举一个例子：一个学号+姓名文本，一个学号+成绩文本，通过MapReduce1整合，再通过MapReduce2对数据的成绩进行排序。

原数据

student.txt

2017111111 一凡
2017222222 张三
2017333333 李四
2017444444 王五
2017555555 赵刘

score.txt

2017111111	100
2017222222	99
2017333333	98
2017444444	99
2017555555  97

代码

MultiMapperJoinReducer2 .java 整合类+主方法

package mywork03;


import java.io.IOException;

import mywork03.mapreduce.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MultiMapperJoinReducer2 {
	public static int time = 0;

	public static class MapA extends Mapper<LongWritable, Text, Text, Text> {
		public void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
			String[] str = values.toString().split(" ");
			context.write(new Text(str[0]), new Text("MapA"+str[1]));
		}
	}

	public static class MapB extends Mapper<Text, Text, Text, Text> {
		public void map(Text key, Text values, Context context) throws IOException, InterruptedException {
//			String[] str = values.toString().split(" ");
			context.write(key, new Text("MapB"+values));
		}
	}

	public static class Reduce extends Reducer<Text, Text, Text, Text> {

		public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
			StringBuffer data = new StringBuffer();
			StringBuffer data1 = new StringBuffer();
			StringBuffer data2 = new StringBuffer();
			
			for (Text val2 : values) {

				 if(val2.find("MapA")==0){
					 data1.append(val2);
					 
					 
				 }else{
					 data2.append(val2);
					 
				 }

			}
			data.append(data1.substring(4)).append("	"+data2.substring(4));
			context.write(new Text(key), new Text(""+data));
		}
	}

	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		//第一个job为整合所有数据
		Job job = Job.getInstance(conf);
		job.setJobName("Table Join");
		job.setJarByClass(MultiMapperJoinReducer2.class);
		job.setNumReduceTasks(1);
		
		MultipleInputs.addInputPath(job, new Path("F:/xxx/student.txt"), TextInputFormat.class,MapA.class);
		MultipleInputs.addInputPath(job, new Path("F:/xxx/score.txt"), KeyValueTextInputFormat.class,MapB.class);
		
		job.setReducerClass(Reduce.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:8020/output"));
//		System.exit(job.waitForCompletion(true) ? 0 : 1);
		job.waitForCompletion(true);
		//第二个job为排序
		String inputPath="hdfs://localhost:8020/output/";
		String outputPath="hdfs://localhost:8020/output1";
		args=new String[] {inputPath,outputPath};
		Configuration conf2=new Configuration();
		Job job2=Job.getInstance(conf2);
		
//这里很重要，不能填map和reduce方法所属的mapreduce类，得填此地本来，这样才能找到main方法
		job2.setJarByClass(MultiMapperJoinReducer2.class);
		
		job2.setOutputKeyClass(Bean.class);
		job.setOutputValueClass(NullWritable.class);
		
		job2.setMapperClass(Map.class);
		job2.setReducerClass(Reduce.class);
		
		FileInputFormat.addInputPath(job2, new Path(args[0]));
		FileOutputFormat.setOutputPath(job2, new Path(args[1]));
		
		job2.waitForCompletion(true);
	}
}

mapreduce.java (排序的方法）


import java.io.IOException;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class mapreduce{
public static class Map extends Mapper<LongWritable, Text, Bean, NullWritable>{
	
	Bean student=new Bean();
	protected void map(LongWritable key ,Text value,Context context) throws IOException,InterruptedException{
		String [] strs=value.toString().split("\t");
		System.out.println(strs.length);
		System.out.println("strs[0]:"+strs[0]+" "+"strs[1]:"+strs[1]+" "+"strs[2]:"+strs[2]);
		student.set(strs);
		context.write(student, NullWritable.get());
			
	}
}
public static class Reduce extends Reducer<Bean, NullWritable, Bean, NullWritable>{
	protected void reduce(Bean key,Iterable<NullWritable> values ,Context context)throws IOException,InterruptedException{
		System.out.println("key:::"+key);
		for (NullWritable value : values) {
			
			context.write(key, NullWritable.get());
		}
	}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
	String inputPath="hdfs://localhost:8020/output/";
	String outputPath="hdfs://localhost:8020/output1";
	args=new String[] {inputPath,outputPath};
	Configuration conf=new Configuration();
	Job job=Job.getInstance(conf);
	job.setJarByClass(mapreduce.class);
	
	job.setOutputKeyClass(Bean.class);
	job.setOutputValueClass(NullWritable.class);
	
	job.setMapperClass(Map.class);
	job.setReducerClass(Reduce.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	
	job.waitForCompletion(true);
	}
	
}

bean.java (排序需要的自定义类）

package mywork03;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class Bean implements WritableComparable<Bean>{

	String studentId;
	String name;
	int score;
	
	public Bean() {
		super();
	}
	public void set(String[] strs) {
		this.studentId = strs[0];
		this.name = strs[1];
		this.score = Integer.parseInt(strs[2]);
		System.out.println("strs[0]:"+strs[0]+" "+"strs[1]:"+strs[1]+" "+"strs[2]:"+strs[2]);
	}
	
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public String getStudentId() {
		return studentId;
	}
	public void setStudentId(String studentId) {
		this.studentId = studentId;
	}
	public int getScore() {
		return score;
	}
	public void setScore(int score) {
		this.score = score;
	}
	@Override
	public void readFields(DataInput in) throws IOException {

		this.studentId=in.readUTF();
		this.name=in.readUTF();
		this.score=in.readInt();
	}
	@Override
	public void write(DataOutput out) throws IOException {

		out.writeUTF(studentId);
		out.writeUTF(name);
		out.writeInt(score);
	}
	@Override
	public int compareTo(Bean o) {

		int thisValue=this.score;
		int thatValue=o.score;
		
		return (thisValue<thatValue ?-1 :(thisValue == thatValue ? 0 :1));
	}
	//显示不然就只有对象地址了
	@Override
	public String toString() {
		// TODO Auto-generated method stub
		return "学号:"+this.studentId+" 姓名:"+this.name+" 成绩:"+this.score;
	}
	
}

hadoop关于在ecplise中同一个main写两个job

hadoop关于在ecplise中同一个main写两个job

原数据

代码

猜你喜欢