data
Mobile phone number URL upstream traffic downstream traffic
13026230503 http://v.baidu.com 20 5000
13826544101 http://www.weibo.com 20 5000
13926435656 http://v.baidu.com/tv 20 5000
13560439658 http://www.edu360.cn 10 4000
13926251106 https://www.jianshu.com/p/bb88f7520b33 70 3000
18211575961 http://weibo.com/?category=1760 10 100
15920133257 https://www.jianshu.com/p/bb88f7520b9e 20 3000
demand
Count the total traffic of each person
The idea is to use the mobile phone number attribute as the KEY and the total traffic of each piece of data as the Value
1 FlowMapper
package com._51doit.flow;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @Auther: 多易教育-行哥
* @Date: 2020/7/10
* @Description:
*/
public class FlowMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
try {
String[] split = value.toString().split("\\s+");
String tel = split[0];
String upFlowStr = split[2];
String downFlowStr = split[3];
long upFlow = Long.parseLong(upFlowStr);
long downFlow = Long.parseLong(downFlowStr);
long flow = upFlow + downFlow;
context.write(new Text(tel), new LongWritable(flow));
} catch (Exception e) {
e.printStackTrace();
}
}
}
2 FlowReducer
package com._51doit.flow;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @Auther: 多易教育-行哥
* @Date: 2020/7/10
* @Description:
*/
public class FlowReducer extends Reducer<Text , LongWritable , Text , LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long totalFlow = 0l ;
for (LongWritable value : values) {
totalFlow+=value.get() ;
}
context.write(key,new LongWritable(totalFlow));
}
}
3 FlowDriver
package com._51doit.flow;
import com._51doit.mr.WordCountMapper;
import com._51doit.mr.WordCountReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.yarn.webapp.hamlet.HamletSpec;
import java.io.IOException;
/**
* @Auther: 多易教育-行哥
* @Date: 2020/7/10
* @Description:
*/
public class FlowDriver {
public static void main(String[] args) throws Exception {
// 1 配置对象
Configuration conf = new Configuration();
// 2 创建任务对象
Job job = Job.getInstance(conf, "wc");
// 2.1 设置 map和reduce任务类
job.setMapperClass(FlowMapper.class);
job.setReducerClass(FlowReducer.class);
//2.2 设置map和reduce 的输出KV
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 2.3 设置reduce的个数 默认1
// job.setNumReduceTasks(2);
// 2.3 设置输入和输出路径
FileInputFormat.setInputPaths(job,new Path("D:\\data\\htt\\"));
FileOutputFormat.setOutputPath(job,new Path("D:\\data\\http\\res"));
// 3 提交任务 等待程序执行完毕 返回值是否成功
boolean b = job.waitForCompletion(true);
System.exit(b?0:-1);
}
}