【Hadoop学习之九】MapReduce案例分析一-天气

环境
  虚拟机:VMware 10
  Linux版本:CentOS-6.5-x86_64
  客户端:Xshell4
  FTP:Xftp4
  jdk8
  hadoop-3.1.1

找出每个月气温最高的2天

1949-10-01 14:21:02        34c
1949-10-01 19:21:02        38c
1949-10-02 14:01:02        36c
1950-01-01 11:21:02        32c
1950-10-01 12:21:02        37c
1951-12-01 12:21:02        23c
1950-10-02 12:21:02        41c
1950-10-03 12:21:02        27c
1951-07-01 12:21:02        45c
1951-07-02 12:21:02        46c
1951-07-03 12:21:03        47c
package test.mr.tq;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


/**
 * @author Administrator
 * 客户端
 */
public class MyTQ {
    

    /**
     * 找出每个月气温最高的2天

     * @param args
     */
    public static void main(String[] args) {
        //加载配置文件
        Configuration conf = new Configuration();
        
        try {
            //创建客户端
            Job job = Job.getInstance(conf,"tian qi");
            job.setJarByClass(MyTQ.class);
            
            //Map
            job.setMapperClass(TQMapper.class);
            job.setOutputKeyClass(TQ.class);
            job.setOutputValueClass(IntWritable.class);
            //分区类  处理大数据量均衡并发处理
            job.setPartitionerClass(TqPartitioner.class);
            //用于buffer字节数组内的key排序的比较类  温度最高的2天  需要排序
            job.setSortComparatorClass(TqSortComparator.class);
            
            //Reduce
            job.setReducerClass(TqReducer.class);
            job.setNumReduceTasks(2);
            //用于分组的比较类  年月相同的被视为一组
            job.setGroupingComparatorClass(TqGroupingComparator.class);
            
            //输入  输出
            Path input = new Path("/root/input");
            FileInputFormat.addInputPath(job, input);
            Path output = new Path("/root/output");
            if (output.getFileSystem(conf).exists(output))
            {
                output.getFileSystem(conf).delete(output, true);
            }
            FileOutputFormat.setOutputPath(job, output);
            
            //提交
            System.exit(job.waitForCompletion(true) ? 0 : 1);
            
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

}
package test.mr.tq;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class TQ implements WritableComparable<TQ>{
    
    private int year;
    private int month;
    private int day;
    private int wd;
    public int getYear() {
        return year;
    }
    public void setYear(int year) {
        this.year = year;
    }
    public int getMonth() {
        return month;
    }
    public void setMonth(int month) {
        this.month = month;
    }
    public int getDay() {
        return day;
    }
    public void setDay(int day) {
        this.day = day;
    }
    public int getWd() {
        return wd;
    }
    public void setWd(int wd) {
        this.wd = wd;
    }
    
    /**
     * 反序列化进来
     */
    @Override
    public void readFields(DataInput in) throws IOException {
        this.year = in.readInt();
        this.month = in.readInt();
        this.day = in.readInt();
        this.wd = in.readInt();
    }
    
    /**
     * 序列化出去
     */
    @Override
    public void write(DataOutput out) throws IOException {
        out.writeInt(year);
        out.writeInt(month);
        out.writeInt(day);
        out.writeInt(wd);
    }
    
    @Override
    public int compareTo(TQ that) {
        //时间正序
        int y = Integer.compare(this.year, that.getYear());
        if (y == 0)
        {
            int m = Integer.compare(this.month, that.getMonth());
            if (m == 0)
            {
                return Integer.compare(this.day, that.getDay());
            }
            return m;
        }
        return y;
    }
    
    

}
package test.mr.tq;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class TqGroupingComparator extends WritableComparator {
    
    public TqGroupingComparator()
    {
        super(TQ.class,true);
    }

    /**
     * 面向reduce  按照年月分组
     * 年月不相同  就不属于同一组  
     * 返回0表示同一组
     */
    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        TQ t1 = (TQ)a;
        TQ t2 = (TQ)b;
        int y = Integer.compare(t1.getYear(), t2.getYear());
        if (y==0)
        {
            return Integer.compare(t1.getMonth(), t2.getMonth());
        }
        return y;
    }
}
package test.mr.tq;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TQMapper extends Mapper<LongWritable, Text, TQ, IntWritable> {
    //k:v映射的设计
    //    K                  V
//    1949-10-01 14:21:02        34c
//    1949-10-01 19:21:02        38c
//    1949-10-02 14:01:02        36c
//    1950-01-01 11:21:02        32c
//    1950-10-01 12:21:02        37c
//    1951-12-01 12:21:02        23c
//    1950-10-02 12:21:02        41c
//    1950-10-03 12:21:02        27c
//    1951-07-01 12:21:02        45c
//    1951-07-02 12:21:02        46c
//    1951-07-03 12:21:03        47c
    
    TQ tq = new TQ();
    IntWritable vwd = new IntWritable();
    
    @Override
    protected void map(LongWritable key, Text value, 
            Context context) throws IOException, InterruptedException 
    {
        try 
        {
            //1951-07-03 12:21:03        47c
            String[] strs = StringUtils.split(value.toString(),"\t");
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
            Date date = sdf.parse(strs[0]);
            
            Calendar cal = Calendar.getInstance();
            cal.setTime(date);
            
            //key
            tq.setYear(cal.get(Calendar.YEAR));
            tq.setMonth(cal.get(Calendar.MONTH)+1);
            tq.setDay(cal.get(Calendar.DAY_OF_MONTH));
            int wd = Integer.parseInt(strs[1].substring(0, strs[1].length()-1));
            tq.setWd(wd);
            
            //value
            vwd.set(wd);
            
            //输出
            context.write(tq, vwd);
        } 
        catch (ParseException e) 
        {
            e.printStackTrace();
        }
        
    }

}
package test.mr.tq;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Partitioner;

/**
 * @author wjy
 * K.V==>K.V.P
 * 分区规则设计  尽量使数据分区均衡  避免倾斜
 */
public class TqPartitioner extends Partitioner<TQ, IntWritable> {

    @Override
    public int getPartition(TQ key, IntWritable value, int numPartitions) {
        
        return key.getYear() % numPartitions;
    }

}
package test.mr.tq;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class TqReducer extends Reducer<TQ, IntWritable, Text, IntWritable> {
    
    Text rkey = new Text();
    IntWritable rval  = new IntWritable();
    
    @Override
    protected void reduce(TQ key, Iterable<IntWritable> values, Context context)
            throws IOException, InterruptedException 
    {
        //相同的key为一组
        // 时间正序             温度倒序
        // 1970 01 01 40
        // 1970 01 02 38
        //迭代values key会随着变化
        int flg = 0;
        int day = 0;
        for (IntWritable wd : values) {
            if (flg == 0)
            {
                day = key.getDay();
                rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());
                rval.set(key.getWd());//wd.get()
                context.write(rkey, rval);
                flg ++;
            }
            
            if (flg != 0 && day != key.getDay())
            {
                rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());
                rval.set(key.getWd());//wd.get()
                context.write(rkey, rval);
                break;
            }
        }
    }
}
package test.mr.tq;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class TqSortComparator extends WritableComparator {
    
    //对字节数据中map进行排序  所以需要先将Key反序列化为对象  然后再进行比较
    public TqSortComparator()
    {
        super(TQ.class,true);
    }
    
    /**
     * 按照时间正序  温度倒序对字节数组排序
     */
    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        TQ t1 = (TQ)a;
        TQ t2 = (TQ)b;
        int y = Integer.compare(t1.getYear(), t2.getYear());
        if (y==0)
        {
            int m = Integer.compare(t1.getMonth(), t2.getMonth());
            if (m == 0)
            {
                //前面加一个负号  就可以实现倒序的效果
                return - Integer.compare(t1.getWd(), t2.getWd());
            }
            return m;
        }
        return y;
    }

}

猜你喜欢

转载自www.cnblogs.com/cac2020/p/10308075.html