MapReduce序列化即案例演示-02
在案例2的基础上进行操作
案例2中最终的结果是这样的,现在需要按照总流量进行排序改怎么办呢?
现在就需要以这个文件为输入,再执行一个mapreduce程序,但是排序只能对key进行排序,所以mapper的输出key就得是实体类,输出的value为手机号,到reducer的时候,再转换过来,输出的key为手机号,输出的value为实体类.
实体类编写,实现WritableComparable<FlowBean>,重写自定义排序规则,<FlowBean>表示跟自身比较.
package com.buba.mapreduce.flowsort;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class FlowBean implements WritableComparable<FlowBean> {
private long upFlow; //上行流量
private long downFlow; //下行流量
private long sumFlow; //总流量
//反序列化时,必须要有空参构造
public FlowBean() {
}
public FlowBean(long upFlow, long downFlow) {
this.upFlow = upFlow;
this.downFlow = downFlow;
this.sumFlow = upFlow + downFlow;
}
public void set(long upFlow, long downFlow){
this.upFlow = upFlow;
this.downFlow = downFlow;
this.sumFlow = upFlow + downFlow;
}
//序列化
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(upFlow);
dataOutput.writeLong(downFlow);
dataOutput.writeLong(sumFlow);
}
//反序列化
@Override
public void readFields(DataInput dataInput) throws IOException {
this.upFlow = dataInput.readLong();
this.downFlow = dataInput.readLong();
this.sumFlow = dataInput.readLong();
}
public long getUpFlow() {
return upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
}
public long getDownFlow() {
return downFlow;
}
public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
}
@Override
public String toString() {
return upFlow + "\t" + downFlow + "\t" + sumFlow;
}
public long getSumFlow() {
return sumFlow;
}
//排序 -1:1倒叙 1:-1正序
@Override
public int compareTo(FlowBean o) {
return this.sumFlow>o.getSumFlow()?-1:1;
}
}
mapper类编写
package com.buba.mapreduce.flowsort;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class FlowSortMapper extends Mapper<LongWritable, Text,FlowBean,Text> {
FlowBean bean = new FlowBean();
Text v = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//1.获取一行
String line = value.toString();
//2.截取字段
String[] fields = line.split("\t");
//3.封装对象级电话号码
long upFlow = Long.parseLong(fields[1]);
long downFlow = Long.parseLong(fields[2]);
bean.set(upFlow,downFlow);
v.set(fields[0]);
//4.写出去
context.write(bean,v);
}
}
reducer编写
package com.buba.mapreduce.flowsort;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FlowSortReducer extends Reducer<FlowBean, Text,Text,FlowBean> {
@Override
protected void reduce(FlowBean bean, Iterable<Text> values, Context context) throws IOException, InterruptedException {
Text v = values.iterator().next();
context.write(v,bean);
}
}
driver类编写
package com.buba.mapreduce.flowsort;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FlowSortDriver {
public static void main(String[] args)throws Exception {
//1.获取job信息
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
//2.获取jar的存储路径
job.setJarByClass(FlowSortDriver.class);
//3.关联map和reduce的class类
job.setMapperClass(FlowSortMapper.class);
job.setReducerClass(FlowSortReducer.class);
//4.设置map阶段输出key和value类型
job.setMapOutputKeyClass(FlowBean.class);
job.setMapOutputValueClass(Text.class);
//5.设置最后输入数据的key和value的类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
//6.设置输入数据的路径和输出数据的路径
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//7.提交
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);
}
}
output就是案例二中执行完的输出结果,把它做为输入
最终输出结果