MapReduce练习之Top k

1. 输入文件格式

algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85

2. 输出文件格式

  1. k=3, 按课程分4个文件,每个文件保存平均成绩前3的人名和平均成绩
  2. 格式如下
    algorithm huangjiaju的成绩:62.0
    algorithm liutao的成绩:56.57
    algorithm huanglei的成绩:55.89

3. 主方法

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration cfg=new Configuration();
    Job job = Job.getInstance(cfg);
    //设置主方法所在的类
    job.setJarByClass(Topk.class);
    job.setMapperClass(TopMaper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setReducerClass(TopReduceer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    //设置reducetask数量为4,默认为1
    job.setNumReduceTasks(4);
    //不重写Partitioner会按map输出的key进行分区,分区数为reducetask数
    //输入路径和输出路径的设置
    FileInputFormat.addInputPath(job, new Path("d:\\mr\\input\\grade.txt"));
    FileOutputFormat.setOutputPath(job, new Path("d:\\mr\\outtopk"));
    System.exit(job.waitForCompletion(true)?0:1);
}

4.map

static class TopMaper extends Mapper<LongWritable,Text,Text,Text> {
    //输入类型为<偏移量,一行文本>,输出类型为<Text,Text>
    private Text mkey=new Text();
    private Text mvalue=new Text();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        System.out.println("map");
        //按,拆分当前行字符串
        String[] lines=value.toString().split(",");
        //第一个字符串为课程,写入输出key
        mkey.set(lines[0]);
        //过滤为空的非法数据
        if (lines==null||lines.length<1){
            return;
        }
        //按下标得到[2]以后所有字符串,转换为double求和
        double sum=0;
        for (int i=2;i<lines.length;i++){
            sum+=new Double(lines[i]);
        }
        //DecimalFormat规定小数点后保留两位
        DecimalFormat df=new DecimalFormat("0.00");
        //输出value为人名,平均成绩
        mvalue.set(lines[1]+","+df.format (sum/lines.length-2));
        context.write(mkey,mvalue);
    }
}

5. 实现Comparable接口的比较类MyCom

    static class MyCom implements Comparable<MyCom>{
        //首字段为人名,次字段为平均成绩
        private String tname;
        private Double tscore;
        //自动生成getset方法
        public String getTname() {
            return tname;
        }

        public void setTname(String tname) {
            this.tname = tname;
        }

        public Double getTscore() {
            return tscore;
        }

        public void setTscore(Double tscore) {
            this.tscore = tscore;
        }

        @Override
        public int compareTo(MyCom o) {
            //对传入的平均成绩进行比较
            return this.tscore.compareTo(o.getTscore());
        }
    }

6. reduce

static class TopReduceer extends Reducer<Text,Text,Text,Text> {
    private Text rvalue=new Text();
    @Override
    protected void reduce(Text mkey, Iterable<Text> iter, Context context) throws IOException, InterruptedException {
        System.out.println("reduce");
        //将MyCom类放入List,通过ArrayList实现
        List<MyCom> slist=new ArrayList<>();
        //遍历传入的人名和成绩
        for (Text it:iter){
            //按,拆分
            String[] lines = it.toString().split(",");
            MyCom c=new MyCom();
            c.setTname(lines[0]);//写入人名
            c.setTscore(new Double(lines[1]));//写入平均成绩
            //将写好的MyCom放入List
            slist.add(c);
        }
        //Collections.sort实现对列表的升序排序
        Collections.sort(slist);
        //Collections.reverse反转升序后的元素,即降序
        Collections.reverse(slist);
        //topk个元素,即输出平均成绩最高的前3条记录
        for (int k=0;k<3;k++){
            MyCom s = slist.get(k);
            rvalue.set(s.getTname() + "的成绩:" + s.getTscore());
            context.write(mkey, rvalue);
        }
    }
}

猜你喜欢

转载自blog.csdn.net/wxfghy/article/details/80568949