上代码
FlowBean
/**
* 封装类 数据的传输
* @author chengguo
* @version 1.0
*/
public class FlowBean implements Writable{
//定义属性
private long upFlow;
private long dfFlow;
private long flowSum;
public FlowBean() {}
//流量累加
public FlowBean(long upFlow, long dfFlow) {
this.upFlow = upFlow;
this.dfFlow = dfFlow;
this.flowSum = upFlow +dfFlow;
}
public long getUpFlow() {
return upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
}
public long getDfFlow() {
return dfFlow;
}
public void setDfFlow(long dfFlow) {
this.dfFlow = dfFlow;
}
public long getFlowSum() {
return flowSum;
}
public void setFlowSum(long flowSum) {
this.flowSum = flowSum;
}
//序列化
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(upFlow);
out.writeLong(dfFlow);
out.writeLong(flowSum);
}
//反序列化
@Override
public void readFields(DataInput in) throws IOException {
upFlow = in.readLong();
dfFlow = in.readLong();
flowSum = in.readLong();
}
@Override
public String toString() {
return upFlow + "\t" + dfFlow + "\t" + flowSum;
}
}
PhoneNumPartitioner
public class PhoneNumPartitioner extends Partitioner<Text, FlowBean>{
/**
* 根据手机号前三位进行分区
*/
@Override
public int getPartition(Text key, FlowBean value, int numPartitions) {
//1.获取手机号前三位
String phoneNum = key.toString().substring(0,3);
//2.分区
int partitioner=4;
if("135".equals(phoneNum)) {
return 0;
}else if("137".equals(phoneNum)) {
return 1;
}else if("138".equals(phoneNum)) {
return 2;
}else if("139".equals(phoneNum)) {
return 3;
}
//3.
return partitioner;
}
}
FlowCountMapper
/**
* 原始数据3631279950322 13822544101 5C-0E-8B-C7-F1-E0:CMCC 120.197.40.4 www.taobao.com 淘宝网 4 0 264 0 200
* 对文件中的数据 截取手机号 上行流量 下行流量三个字段进行处理
* 13822544101 264 0
* @author chengguo
* @version 1.0
*/
public class FlowCountMapper extends Mapper<LongWritable ,Text , Text, FlowBean>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//1.获取数据
String line = value.toString();
//2.切割
String[] fields = line.split("\t");
//3.封装对象 拿到关键字段 数据清洗
String phoneN = fields[1];
long upFlow = Long.parseLong(fields[fields.length-3]);
long dfFlow = Long.parseLong(fields[fields.length-2]);
//4.输出到reducer端13822544101 264 0
context.write(new Text(phoneN) , new FlowBean(upFlow,dfFlow));
}
}
FlowCountReducer
public class FlowCountReducer extends Reducer<Text, FlowBean, Text, FlowBean>{
@Override
protected void reduce(Text key, Iterable<FlowBean> values, Context context)
throws IOException, InterruptedException {
//1.相同手机号的流量使用再次汇总
long upFlow_sum=0;
long dfFlow_sum=0;
//2.累加
for(FlowBean f: values) {
upFlow_sum += f.getUpFlow();
dfFlow_sum += f.getDfFlow();
}
FlowBean rs = new FlowBean(upFlow_sum,dfFlow_sum);
//3.输出
context.write(key, rs);
}
}
FlowCountDriver
public class FlowCountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//1.获取job信息
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//2.获取jar包
job.setJarByClass(FlowCountDriver.class);
//3.获取自定义的mapper与reducer类
job.setMapperClass(FlowCountMapper.class);
job.setReducerClass(FlowCountReducer.class);
//4.设置mapper的输出数据类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class);
//5.设置reducer输出的数据类型(最终数据类型)
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
//设置自定义的分区类
job.setPartitionerClass(PhoneNumPartitioner.class);
job.setNumReduceTasks(5);
//6.设置输入存在的路径与处理后的结果路径
FileInputFormat.setInputPaths(job, new Path("c:/BIGDATA/Test/flow1020/in"));
FileOutputFormat.setOutputPath(job, new Path("c:/BIGDATA/Test/flow1020/out"));
//7.提交任务
boolean rs = job.waitForCompletion(true);
System.out.println(rs ? 0 : 1);
}
}