数据
手机号 URL 上行流量 下行流量
13026230503 http://v.baidu.com 20 5000
13826544101 http://www.weibo.com 20 5000
13926435656 http://v.baidu.com/tv 20 5000
13560439658 http://www.edu360.cn 10 4000
13926251106 https://www.jianshu.com/p/bb88f7520b33 70 3000
18211575961 http://weibo.com/?category=1760 10 100
15920133257 https://www.jianshu.com/p/bb88f7520b9e 20 3000
需求
统计每个人的总流量
思路 将手机号属性作为KEY 每条数据的总流量作为Value
1 FlowMapper
package com._51doit.flow;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @Auther: 多易教育-行哥
* @Date: 2020/7/10
* @Description:
*/
public class FlowMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
try {
String[] split = value.toString().split("\\s+");
String tel = split[0];
String upFlowStr = split[2];
String downFlowStr = split[3];
long upFlow = Long.parseLong(upFlowStr);
long downFlow = Long.parseLong(downFlowStr);
long flow = upFlow + downFlow;
context.write(new Text(tel), new LongWritable(flow));
} catch (Exception e) {
e.printStackTrace();
}
}
}
2 FlowReducer
package com._51doit.flow;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @Auther: 多易教育-行哥
* @Date: 2020/7/10
* @Description:
*/
public class FlowReducer extends Reducer<Text , LongWritable , Text , LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long totalFlow = 0l ;
for (LongWritable value : values) {
totalFlow+=value.get() ;
}
context.write(key,new LongWritable(totalFlow));
}
}
3 FlowDriver
package com._51doit.flow;
import com._51doit.mr.WordCountMapper;
import com._51doit.mr.WordCountReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.yarn.webapp.hamlet.HamletSpec;
import java.io.IOException;
/**
* @Auther: 多易教育-行哥
* @Date: 2020/7/10
* @Description:
*/
public class FlowDriver {
public static void main(String[] args) throws Exception {
// 1 配置对象
Configuration conf = new Configuration();
// 2 创建任务对象
Job job = Job.getInstance(conf, "wc");
// 2.1 设置 map和reduce任务类
job.setMapperClass(FlowMapper.class);
job.setReducerClass(FlowReducer.class);
//2.2 设置map和reduce 的输出KV
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 2.3 设置reduce的个数 默认1
// job.setNumReduceTasks(2);
// 2.3 设置输入和输出路径
FileInputFormat.setInputPaths(job,new Path("D:\\data\\htt\\"));
FileOutputFormat.setOutputPath(job,new Path("D:\\data\\http\\res"));
// 3 提交任务 等待程序执行完毕 返回值是否成功
boolean b = job.waitForCompletion(true);
System.exit(b?0:-1);
}
}