版权声明:数据丁 https://blog.csdn.net/reasery/article/details/82875815
bean类
package mrpro927;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
/*
* 自定义的类作为MapReduce传输对象的时候,必须序列化,实现WritableComparable 接口
* 泛型为map输出的key的类型
* 可以把需要的字段都封装过来,map输出的value就可以用NullWritable来代替
*
*/
public class phoneBeanCustomSort implements WritableComparable<phoneBeanCustomSort>{
private int upflow;
private int downflow;
private int sum;
private long pnum;
//手机号不能用int,否则过长,也可以用string
public int getUpflow() {
return upflow;
}
public void setUpflow(int upflow) {
this.upflow = upflow;
}
public int getDownflow() {
return downflow;
}
public void setDownflow(int downflow) {
this.downflow = downflow;
}
public int getSum() {
return sum;
}
public void setSum(int sum) {
this.sum = sum;
}
public long getPnum() {
return pnum;
}
public void setPnum(long pnum) {
this.pnum = pnum;
}
@Override
public String toString() {
return upflow + "\t" + downflow + "\t" + sum +"\t"+pnum;
}
//序列化的方法,对象=》二进制
//map发到reduce端的的时候先序列化
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(upflow);
out.writeInt(downflow);
out.writeInt(sum);
out.writeLong(pnum);
}
//反序列化的方法,到reduce端的时候进行反序列化,和序列化的顺序一定要一致
@Override
public void readFields(DataInput in) throws IOException {
this.upflow = in.readInt();
this.downflow = in.readInt();
this.sum = in.readInt();
this.pnum = in.readLong();
}
//指定排序规则,this本对象,o是用于比较的其他对象
//先按照上行流量排序,再按照下行流量进行排序
@Override
public int compareTo(phoneBeanCustomSort o) {
//先按照上行流量进行排序
int tmp = o.upflow - this.upflow;
//如果上行流量相等就按照下行流量进行排序
if(tmp==0){
tmp = o.downflow -this.downflow;
return tmp;
}
return tmp;
}
}
main类
package mrpro927;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/*
*自定义类的自定义排序,
*先按照上行流量排序,再按照下行流量进行排序
*
*/
public class phoneDataCustomSort {
//
public static class MyMapper extends Mapper<LongWritable, Text, phoneBeanCustomSort, NullWritable>{
phoneBeanCustomSort p = new phoneBeanCustomSort();
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, phoneBeanCustomSort, NullWritable>.Context context)
throws IOException, InterruptedException {
//取出每一行,并按\t 进行切分
String[] split = value.toString().split("\t");
//清洗出有效数据
if(split.length == 11){
p.setUpflow(Integer.parseInt(split[7]));
p.setDownflow(Integer.parseInt(split[8]));
p.setSum(p.getUpflow()+p.getDownflow());
p.setPnum(Long.parseLong(split[1]));
context.write(p, NullWritable.get());
}
}
}
//每组调用一次
public static class MyReducer extends Reducer<phoneBeanCustomSort, NullWritable,phoneBeanCustomSort, NullWritable>{
@Override
protected void reduce(phoneBeanCustomSort key, Iterable<NullWritable> values,
Reducer<phoneBeanCustomSort, NullWritable, phoneBeanCustomSort, NullWritable>.Context context)
throws IOException, InterruptedException {
for(NullWritable n:values){
context.write(key, NullWritable.get());
}
}
}
public static class MyPartitioner extends Partitioner<phoneBeanCustomSort, NullWritable>{
@Override
public int getPartition(phoneBeanCustomSort key, NullWritable value, int numPartitions) {
String s = String.valueOf(key.getPnum());
if(s.startsWith("136")){
return 0;
}else if(s.startsWith("137")){
return 1;
}else if(s.startsWith("138")){
return 2;
}else if(s.startsWith("139")){
return 3;
}else {
return 4;
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//加载配置文件
Configuration conf = new Configuration();
//eclipse运行设置linux用户名
System.setProperty("HADOOP_USER_NAME", "mading");
//启动一个job
Job job = Job.getInstance(conf);
//指定当前任务的主类
job.setJarByClass(phoneDataCustomSort.class);
//指定mapper和reducer类
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
//指定map输出的key,value类型,如果和reduce的输出类型相同的情况下可以省略
job.setMapOutputKeyClass(phoneBeanCustomSort.class);
job.setMapOutputValueClass(NullWritable.class);
//指定reduce输出的key,value类型
job.setOutputKeyClass(phoneBeanCustomSort.class);
job.setOutputValueClass(NullWritable.class);
//指定分区算法
//job.setPartitionerClass(MyPartitioner.class);
//设置reducetask的并行度
//job.setNumReduceTasks(5);
//指定文件输入的路径,这里是HA高可用集群的路径
FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/pout01"));
//指定文件的输出路径
FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/sout01"));
//提交job
job.waitForCompletion(true);
}
}
ps:phonenum字段不能用int,否则会过长,这个报错找了好久都不知道怎么回事