Hadoop detailed notes (10) traffic case of mapreduce data analysis case

data

Mobile phone number URL upstream traffic downstream traffic

13026230503    http://v.baidu.com 20 5000
13826544101    http://www.weibo.com 20 5000
13926435656    http://v.baidu.com/tv 20 5000
13560439658    http://www.edu360.cn 10 4000
13926251106    https://www.jianshu.com/p/bb88f7520b33 70 3000
18211575961    http://weibo.com/?category=1760 10 100
15920133257    https://www.jianshu.com/p/bb88f7520b9e 20 3000

demand

Count the total traffic of each person 

The idea is to use the mobile phone number attribute as the KEY and the total traffic of each piece of data as the Value

1 FlowMapper

package com._51doit.flow;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @Auther: 多易教育-行哥
 * @Date: 2020/7/10
 * @Description:
 */
public class FlowMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        try {
            String[] split = value.toString().split("\\s+");
            String tel = split[0];
            String upFlowStr = split[2];
            String downFlowStr = split[3];
            long upFlow = Long.parseLong(upFlowStr);
            long downFlow = Long.parseLong(downFlowStr);
            long flow = upFlow + downFlow;
            context.write(new Text(tel), new LongWritable(flow));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

2 FlowReducer

package com._51doit.flow;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @Auther: 多易教育-行哥
 * @Date: 2020/7/10
 * @Description:
 */
public class FlowReducer extends Reducer<Text , LongWritable , Text , LongWritable> {
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        long  totalFlow  = 0l ;
        for (LongWritable value : values) {
            totalFlow+=value.get() ;
        }
        context.write(key,new LongWritable(totalFlow));
    }
}

 

3 FlowDriver

package com._51doit.flow;

import com._51doit.mr.WordCountMapper;
import com._51doit.mr.WordCountReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.yarn.webapp.hamlet.HamletSpec;

import java.io.IOException;

/**
 * @Auther: 多易教育-行哥
 * @Date: 2020/7/10
 * @Description:
 */
public class FlowDriver {
    public static void main(String[] args) throws Exception {
        // 1 配置对象
        Configuration conf = new Configuration();
        // 2 创建任务对象
        Job job = Job.getInstance(conf, "wc");
        // 2.1 设置 map和reduce任务类
        job.setMapperClass(FlowMapper.class);
        job.setReducerClass(FlowReducer.class);
        //2.2 设置map和reduce 的输出KV
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        // 2.3 设置reduce的个数  默认1
       // job.setNumReduceTasks(2);
        // 2.3 设置输入和输出路径
        FileInputFormat.setInputPaths(job,new Path("D:\\data\\htt\\"));
        FileOutputFormat.setOutputPath(job,new Path("D:\\data\\http\\res"));
        // 3 提交任务  等待程序执行完毕   返回值是否成功
        boolean b = job.waitForCompletion(true);
        System.exit(b?0:-1);

    }
}

 

Guess you like

Origin blog.csdn.net/qq_37933018/article/details/107247702