MR中自定义bean作为key,输出某组排序中最大值。

目录

  • 需求:MR中自定义bean作为key,输出某组排序中最大值。
  • 方案:重写MR中groupingcomparator方法

1.需求:MR中自定义bean作为key,输出某组排序中最大值。

   场景:求出多个订单中,金额最大的商品价格。

2.方案:重写MR中 groupingcomparator 方法

  •       定义排序:orderBean 类,重写compareto方法,订单相同时,比较价格,并且降序desc

public class OrderBean implements WritableComparable<OrderBean>{

 

            private Text itemid;

            private DoubleWritable amount;

 

            public OrderBean() {

            }

 

            public OrderBean(Text itemid, DoubleWritable amount) {

                       set(itemid, amount);

 

            }

 

            public void set(Text itemid, DoubleWritable amount) {

 

                       this.itemid = itemid;

                       this.amount = amount;

 

            }

 

 

 

            public Text getItemid() {

                       return itemid;

            }

 

            public DoubleWritable getAmount() {

                       return amount;

            }

 

            @Override

            public int compareTo(OrderBean o) {

                       int cmp = this.itemid.compareTo(o.getItemid());

                       if (cmp == 0) {

                                   cmp = -this.amount.compareTo(o.getAmount());

                       }

                       return cmp;

            }

 

            @Override

            public void write(DataOutput out) throws IOException {

                       out.writeUTF(itemid.toString());

                       out.writeDouble(amount.get());

                      

            }

 

            @Override

            public void readFields(DataInput in) throws IOException {

                       String readUTF = in.readUTF();

                       double readDouble = in.readDouble();

                      

                       this.itemid = new Text(readUTF);

                       this.amount= new DoubleWritable(readDouble);

            }

 

 

            @Override

            public String toString() {

 

                       return itemid.toString() + "\t" + amount.get();

                      

            }

 

}

     

  •      定义分区:itemPartitioner 类,按照订单id分组

public class ItemIdPartitioner extends Partitioner<OrderBean, NullWritable>{

            @Override

            public int getPartition(OrderBean bean, NullWritable value, int numReduceTasks) {

                       //相同id的订单bean,会发往相同的partition

                       //而且,产生的分区数,是会跟用户设置的reduce task数保持一致

                       return (bean.getItemid().hashCode() & Integer.MAX_VALUE) % numReduceTasks;

            }

}

 

  •      定义groupingComparator,利用reduce中该组件将订单相同的bean组成一组

public class ItemidGroupingComparator extends WritableComparator {

 

         //传入作为key的bean的class类型,以及制定需要让框架做反射获取实例对象

         protected ItemidGroupingComparator() {

                  super(OrderBean.class, true);

         }

        

 

         @Override

         public int compare(WritableComparable a, WritableComparable b) {

                  OrderBean abean = (OrderBean) a;

                  OrderBean bbean = (OrderBean) b;

                 

                  //比较两个bean时,指定只比较bean中的orderid

                  return abean.getItemid().compareTo(bbean.getItemid());

                 

         }

 

}

      运行类SecondarySort

public class SecondarySort {

           

            static class SecondarySortMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{

                      

                       OrderBean bean = new OrderBean();

                      

                       @Override

                       protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

 

                                   String line = value.toString();

                                   String[] fields = StringUtils.split(line, ",");

                                  

                                   bean.set(new Text(fields[0]), new DoubleWritable(Double.parseDouble(fields[2])));

                                  

                                   context.write(bean, NullWritable.get());

                                  

                       }

                      

            }

           

            static class SecondarySortReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{

                      

                      

                       //到达reduce时,相同id的所有bean已经被看成一组,且金额最大的那个一排在第一位

                       @Override

                       protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {

                                   context.write(key, NullWritable.get());

                       }

            }

           

           

            public static void main(String[] args) throws Exception {

                      

                       Configuration conf = new Configuration();

                       Job job = Job.getInstance(conf);

                      

                       job.setJarByClass(SecondarySort.class);

                      

                       job.setMapperClass(SecondarySortMapper.class);

                       job.setReducerClass(SecondarySortReducer.class);

                      

                      

                       job.setOutputKeyClass(OrderBean.class);

                       job.setOutputValueClass(NullWritable.class);

                      

                       FileInputFormat.setInputPaths(job, new Path("hdfs://shizhan01:9000/secondarysort/input"));

                       FileOutputFormat.setOutputPath(job, new Path("hdfs://shizhan01:9000/secondarysort/output3"));

                      

                       //在此设置自定义的Groupingcomparator

                       job.setGroupingComparatorClass(ItemidGroupingComparator.class);

                       //在此设置自定义的partitioner

                       job.setPartitionerClass(ItemIdPartitioner.class);

                      

                       job.setNumReduceTasks(1);

                      

                       job.waitForCompletion(true);

                      

            }

 

}

    预处理数据:

                      

   处理后结果:

                     

猜你喜欢

转载自blog.csdn.net/py_tamir/article/details/84982562