1. 输入文件格式
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
2. 输出文件格式
- k=3, 按课程分4个文件,每个文件保存平均成绩前3的人名和平均成绩
- 格式如下
algorithm huangjiaju的成绩:62.0
algorithm liutao的成绩:56.57
algorithm huanglei的成绩:55.89
3. 主方法
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration cfg=new Configuration();
Job job = Job.getInstance(cfg);
//设置主方法所在的类
job.setJarByClass(Topk.class);
job.setMapperClass(TopMaper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(TopReduceer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//设置reducetask数量为4,默认为1
job.setNumReduceTasks(4);
//不重写Partitioner会按map输出的key进行分区,分区数为reducetask数
//输入路径和输出路径的设置
FileInputFormat.addInputPath(job, new Path("d:\\mr\\input\\grade.txt"));
FileOutputFormat.setOutputPath(job, new Path("d:\\mr\\outtopk"));
System.exit(job.waitForCompletion(true)?0:1);
}
4.map
static class TopMaper extends Mapper<LongWritable,Text,Text,Text> {
//输入类型为<偏移量,一行文本>,输出类型为<Text,Text>
private Text mkey=new Text();
private Text mvalue=new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
System.out.println("map");
//按,拆分当前行字符串
String[] lines=value.toString().split(",");
//第一个字符串为课程,写入输出key
mkey.set(lines[0]);
//过滤为空的非法数据
if (lines==null||lines.length<1){
return;
}
//按下标得到[2]以后所有字符串,转换为double求和
double sum=0;
for (int i=2;i<lines.length;i++){
sum+=new Double(lines[i]);
}
//DecimalFormat规定小数点后保留两位
DecimalFormat df=new DecimalFormat("0.00");
//输出value为人名,平均成绩
mvalue.set(lines[1]+","+df.format (sum/lines.length-2));
context.write(mkey,mvalue);
}
}
5. 实现Comparable接口的比较类MyCom
static class MyCom implements Comparable<MyCom>{
//首字段为人名,次字段为平均成绩
private String tname;
private Double tscore;
//自动生成getset方法
public String getTname() {
return tname;
}
public void setTname(String tname) {
this.tname = tname;
}
public Double getTscore() {
return tscore;
}
public void setTscore(Double tscore) {
this.tscore = tscore;
}
@Override
public int compareTo(MyCom o) {
//对传入的平均成绩进行比较
return this.tscore.compareTo(o.getTscore());
}
}
6. reduce
static class TopReduceer extends Reducer<Text,Text,Text,Text> {
private Text rvalue=new Text();
@Override
protected void reduce(Text mkey, Iterable<Text> iter, Context context) throws IOException, InterruptedException {
System.out.println("reduce");
//将MyCom类放入List,通过ArrayList实现
List<MyCom> slist=new ArrayList<>();
//遍历传入的人名和成绩
for (Text it:iter){
//按,拆分
String[] lines = it.toString().split(",");
MyCom c=new MyCom();
c.setTname(lines[0]);//写入人名
c.setTscore(new Double(lines[1]));//写入平均成绩
//将写好的MyCom放入List
slist.add(c);
}
//Collections.sort实现对列表的升序排序
Collections.sort(slist);
//Collections.reverse反转升序后的元素,即降序
Collections.reverse(slist);
//topk个元素,即输出平均成绩最高的前3条记录
for (int k=0;k<3;k++){
MyCom s = slist.get(k);
rvalue.set(s.getTname() + "的成绩:" + s.getTscore());
context.write(mkey, rvalue);
}
}
}