单一最值

准备数据

2017-10 300
2017-10 100
2017-10 200
2017-11 320
2017-11 200
2017-11 280
2017-12 290
2017-12 270

自定义复杂类型

package com.hadoop.maxmin;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * 自定义Writable类,存储最大值,最小值。
 */
public class MaxMinWritable implements Writable {
    private int max;
    private int min;
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeInt(max);
        out.writeInt(min);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.max = in.readInt();
        this.min = in.readInt();
    }
    public MaxMinWritable(){}
    public int getMax() {
        return max;
    }

    public void setMax(int max) {
        this.max = max;
    }

    public int getMin() {
        return min;
    }

    public void setMin(int min) {
        this.min = min;
    }

    @Override
    public String toString() {
        return max +"\t"+ min;
    }
}

编写Mapper类

package com.hadoop.maxmin;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 自定义Mapper
 */
public class MaxMinMapper extends Mapper<Object, Text,Text,MaxMinWritable> {
    // 取得MaxMinWritable的一个实例
    private MaxMinWritable maxmin = new MaxMinWritable();
    @Override
    protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
        String[] words = value.toString().split(" ");
        String strDate = words[0]; //接收日期
        // 将值作为最大值和最小值传入
        maxmin.setMax(Integer.parseInt(words[1]));
        maxmin.setMin(Integer.parseInt(words[1]));
        // 写入context
        context.write(new Text(strDate),maxmin);
    }
}

编写Reducer类

package com.hadoop.maxmin;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * 自定义Reducer
 */
public class MaxMinReducer extends Reducer<Text,MaxMinWritable,Text,MaxMinWritable> {
    // 用于保存结果的MaxMinWritable
    private MaxMinWritable result = new MaxMinWritable();
    @Override
    protected void reduce(Text key, Iterable<MaxMinWritable> values, Context context) throws IOException, InterruptedException {
        result.setMax(0);
        result.setMin(0);
        for (MaxMinWritable value : values) {
            // 保存最小值
            // result.getMin() ==0用于保存第一个结果
            if (result.getMin() ==0 || value.getMin() < result.getMin()){
                result.setMin(value.getMin());
            }
            // 保存最大值
            // result.getMax() ==0用于保存第一个结果
            if (result.getMax() ==0 || value.getMax() > result.getMax()){
                result.setMax(value.getMax());
            }
        }
        context.write(key,result);
    }
}

编写Driver类

package com.hadoop.maxmin;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MaxMinDriver {
    public static void main(String[] args) throws Exception{
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(MaxMinDriver.class);
        job.setMapperClass(MaxMinMapper.class);
        job.setReducerClass(MaxMinReducer.class);
        job.setCombinerClass(MaxMinReducer.class); //Combiner优化
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(MaxMinWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(MaxMinWritable.class);
        FileInputFormat.setInputPaths(job,new Path("input"));
        FileOutputFormat.setOutputPath(job,new Path("output/maxmin2"));
        job.waitForCompletion(true);
    }
}

输出结果part-r-00000

2017-10 300 100
2017-11 320 200
2017-12 290 270

猜你喜欢

转载自www.cnblogs.com/JZTX123/p/10661380.html