hadoop详细笔记(十) mapreduce数据分析案例之流量案例

数据

手机号       URL      上行流量     下行流量

13026230503    http://v.baidu.com 20 5000
13826544101    http://www.weibo.com 20 5000
13926435656    http://v.baidu.com/tv 20 5000
13560439658    http://www.edu360.cn 10 4000
13926251106    https://www.jianshu.com/p/bb88f7520b33 70 3000
18211575961    http://weibo.com/?category=1760 10 100
15920133257    https://www.jianshu.com/p/bb88f7520b9e 20 3000

需求

统计每个人的总流量 

思路 将手机号属性作为KEY 每条数据的总流量作为Value

1 FlowMapper

package com._51doit.flow;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @Auther: 多易教育-行哥
 * @Date: 2020/7/10
 * @Description:
 */
public class FlowMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        try {
            String[] split = value.toString().split("\\s+");
            String tel = split[0];
            String upFlowStr = split[2];
            String downFlowStr = split[3];
            long upFlow = Long.parseLong(upFlowStr);
            long downFlow = Long.parseLong(downFlowStr);
            long flow = upFlow + downFlow;
            context.write(new Text(tel), new LongWritable(flow));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

2 FlowReducer

package com._51doit.flow;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @Auther: 多易教育-行哥
 * @Date: 2020/7/10
 * @Description:
 */
public class FlowReducer extends Reducer<Text , LongWritable , Text , LongWritable> {
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        long  totalFlow  = 0l ;
        for (LongWritable value : values) {
            totalFlow+=value.get() ;
        }
        context.write(key,new LongWritable(totalFlow));
    }
}

3 FlowDriver

package com._51doit.flow;

import com._51doit.mr.WordCountMapper;
import com._51doit.mr.WordCountReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.yarn.webapp.hamlet.HamletSpec;

import java.io.IOException;

/**
 * @Auther: 多易教育-行哥
 * @Date: 2020/7/10
 * @Description:
 */
public class FlowDriver {
    public static void main(String[] args) throws Exception {
        // 1 配置对象
        Configuration conf = new Configuration();
        // 2 创建任务对象
        Job job = Job.getInstance(conf, "wc");
        // 2.1 设置 map和reduce任务类
        job.setMapperClass(FlowMapper.class);
        job.setReducerClass(FlowReducer.class);
        //2.2 设置map和reduce 的输出KV
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        // 2.3 设置reduce的个数  默认1
       // job.setNumReduceTasks(2);
        // 2.3 设置输入和输出路径
        FileInputFormat.setInputPaths(job,new Path("D:\\data\\htt\\"));
        FileOutputFormat.setOutputPath(job,new Path("D:\\data\\http\\res"));
        // 3 提交任务  等待程序执行完毕   返回值是否成功
        boolean b = job.waitForCompletion(true);
        System.exit(b?0:-1);

    }
}

猜你喜欢

转载自blog.csdn.net/qq_37933018/article/details/107247702