Hadoop对输出的数据进行排序另外分组
数据
如果没有数据的话可以点开
https://blog.csdn.net/qq_17623363/article/details/104146939
对我这个案例进行做出来
然后
这是上一个案例另外加的一个分组
代码
MyPartitioner2.java
package com.zhenghui.writablecomparable2;
import com.zhenghui.writablecomparable.FlowBean;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
//按Value(Text)进行分区
public class MyPartitioner2 extends Partitioner<FlowBean, Text> {
/**
* 返回分区号
* @param text
* @param flowBean
* @param numPartitions
* @return
*/
@Override
public int getPartition(FlowBean flowBean, Text text, int numPartitions) {
String phone = text.toString();//获取手机号
switch (phone.substring(0,3)){
case "136":
return 0;
case "137":
return 1;
case "138":
return 2;
case "139":
return 3;
default:
return 4;
}
}
}
SortDriver.java
package com.zhenghui.writablecomparable2;
import com.zhenghui.writablecomparable.FlowBean;
import com.zhenghui.writablecomparable.SortMapper;
import com.zhenghui.writablecomparable.SortReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class SortDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
// conf.set("mapreduce.output.key.field.separator", ",");
Job job = Job.getInstance(conf);
// 输出结果key val 分隔符修改为空 直接输出结果
job.setJarByClass(SortDriver.class);
job.setMapperClass(SortMapper.class);
job.setReducerClass(SortReducer.class);
job.setMapOutputKeyClass(FlowBean.class);
job.setMapOutputValueClass(Text.class);
//设置分区
job.setNumReduceTasks(5);
job.setPartitionerClass(MyPartitioner2.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
// FileInputFormat.setInputPaths(job,new Path("E:\\file\\p.txt"));
FileInputFormat.setInputPaths(job,new Path("E:\\outputa1"));
FileOutputFormat.setOutputPath(job,new Path("E:\\output4"));
boolean b = job.waitForCompletion(true);
System.exit( b ? 0 : 1);
}
}