准备数据
2017-10 300
2017-10 100
2017-10 200
2017-11 320
2017-11 200
2017-11 280
2017-12 290
2017-12 270
自定义复杂类型
package com.hadoop.maxmin;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* 自定义Writable类,存储最大值,最小值。
*/
public class MaxMinWritable implements Writable {
private int max;
private int min;
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(max);
out.writeInt(min);
}
@Override
public void readFields(DataInput in) throws IOException {
this.max = in.readInt();
this.min = in.readInt();
}
public MaxMinWritable(){}
public int getMax() {
return max;
}
public void setMax(int max) {
this.max = max;
}
public int getMin() {
return min;
}
public void setMin(int min) {
this.min = min;
}
@Override
public String toString() {
return max +"\t"+ min;
}
}
编写Mapper类
package com.hadoop.maxmin;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* 自定义Mapper
*/
public class MaxMinMapper extends Mapper<Object, Text,Text,MaxMinWritable> {
// 取得MaxMinWritable的一个实例
private MaxMinWritable maxmin = new MaxMinWritable();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] words = value.toString().split(" ");
String strDate = words[0]; //接收日期
// 将值作为最大值和最小值传入
maxmin.setMax(Integer.parseInt(words[1]));
maxmin.setMin(Integer.parseInt(words[1]));
// 写入context
context.write(new Text(strDate),maxmin);
}
}
编写Reducer类
package com.hadoop.maxmin;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* 自定义Reducer
*/
public class MaxMinReducer extends Reducer<Text,MaxMinWritable,Text,MaxMinWritable> {
// 用于保存结果的MaxMinWritable
private MaxMinWritable result = new MaxMinWritable();
@Override
protected void reduce(Text key, Iterable<MaxMinWritable> values, Context context) throws IOException, InterruptedException {
result.setMax(0);
result.setMin(0);
for (MaxMinWritable value : values) {
// 保存最小值
// result.getMin() ==0用于保存第一个结果
if (result.getMin() ==0 || value.getMin() < result.getMin()){
result.setMin(value.getMin());
}
// 保存最大值
// result.getMax() ==0用于保存第一个结果
if (result.getMax() ==0 || value.getMax() > result.getMax()){
result.setMax(value.getMax());
}
}
context.write(key,result);
}
}
编写Driver类
package com.hadoop.maxmin;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MaxMinDriver {
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(MaxMinDriver.class);
job.setMapperClass(MaxMinMapper.class);
job.setReducerClass(MaxMinReducer.class);
job.setCombinerClass(MaxMinReducer.class); //Combiner优化
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(MaxMinWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(MaxMinWritable.class);
FileInputFormat.setInputPaths(job,new Path("input"));
FileOutputFormat.setOutputPath(job,new Path("output/maxmin2"));
job.waitForCompletion(true);
}
}
输出结果part-r-00000
2017-10 300 100
2017-11 320 200
2017-12 290 270