MapReduce(4)--MapReduce 统计求和,排序

1.统计求和,需求:

统计每个手机号的上行流量总和,下行流量总和,上行总流量之和,下行总流量之和

文件:

13631579850	13726230503	00-FD-07-A4-72-B8:CMCC	120.196.100.82	i02.c.aliimg.com	游戏娱乐	24	27	2481	24681	200
13631579950	13826544101	5C-0E-8B-C7-F1-E0:CMCC	120.197.40.4	jd.com	京东购物	4	0	264	0	200
13631579910	13926435656	20-10-7A-28-CC-0A:CMCC	120.196.100.99	taobao.com	淘宝购物	2	4	132	1512	200
13631544000	13926251106	5C-0E-8B-8B-B1-50:CMCC	120.197.40.4	cnblogs.com	技术门户	4	0	240	0	200
13631579930	18211575961	94-71-AC-CD-E6-18:CMCC-EASY	120.196.100.99	iface.qiyi.com	视频网站	15	12	1527	2106	200
13631579950	84138413	5C-0E-8B-8C-E8-20:7DaysInn	120.197.40.4	122.72.52.12	未知	20	16	4116	1432	200
13631579930	13560439658	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99	sougou.com	综合门户	18	15	1116	954	200
13631579950	15920133257	5C-0E-8B-C7-BA-20:CMCC	120.197.40.4	sug.so.360.cn	信息安全	20	20	3156	2936	200
13631579830	13719199419	68-A1-B7-03-07-B1:CMCC-EASY	120.196.100.82	baidu.com	综合搜索	4	0	240	0	200
13631579840	13660577991	5C-0E-8B-92-5C-20:CMCC-EASY	120.197.40.4	s19.cnzz.com	站点统计	24	9	6960	690	200
13631579730	15013685858	5C-0E-8B-C7-F7-90:CMCC	120.197.40.4	rank.ie.sogou.com	搜索引擎	28	27	3659	3538	200
13631579860	15989002119	E8-99-C4-4E-93-E0:CMCC-EASY	120.196.100.99	www.umeng.com	站点统计	3	3	1938	180	200
13631579920	13560439658	C4-17-FE-BA-DE-D9:CMCC	120.196.100.99	zhilian.com	招聘门户	15	9	918	4938	200
13631579860	13480253104	5C-0E-8B-C7-FC-80:CMCC-EASY	120.197.40.4	csdn.net	技术门户	3	3	180	180	200
13631579840	13602846565	5C-0E-8B-8B-B6-00:CMCC	120.197.40.4	2052.flash2-http.qq.com	综合门户	15	12	1938	2910	200
13631579950	13922314466	00-FD-07-A2-EC-BA:CMCC	120.196.100.82	img.qfc.cn	图片大全	12	12	3008	3720	200
13631579820	13502468823	5C-0A-5B-6A-0B-D4:CMCC-EASY	120.196.100.99	y0.ifengimg.com	综合门户	57	102	7335	110349	200
13631579860	18320173382	84-25-DB-4F-10-1A:CMCC-EASY	120.196.100.99	input.shouji.sogou.com	搜索引擎	21	18	9531	2412	200
13631579900	13925057413	00-1F-64-E1-E6-9A:CMCC	120.196.100.55	t3.baidu.com	搜索引擎	69	63	11058	48243	200
13631579880	13760778710	00-FD-07-A4-7B-08:CMCC	120.196.100.82	http://youku.com/	视频网站	2	2	120	120	200
13631579850	13823070001	20-7C-8F-70-68-1F:CMCC	120.196.100.99	img.qfc.cn	图片浏览	6	3	360	180	200
13631579850	13600217502	00-1F-64-E2-E8-B1:CMCC	120.196.100.55	www.baidu.com	综合门户	18	138	1080	186852	200

2.分析:

以手机号码作为key值,上行流量,下行流量,上行总流量,下行总流量四个字段作为value值,然后以这个key,和value作为map阶段的输出,reduce阶段的输入

3.代码:

map

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @author kismet
 */
public class WordCountMap extends Mapper<LongWritable, Text, Text, FlowBean> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().split("\t");
        FlowBean flowBean = new FlowBean();
        flowBean.setUpFlow(Integer.parseInt(split[6]));
        flowBean.setDownFlow(Integer.parseInt(split[7]));
        flowBean.setUpCountFlow(Integer.parseInt(split[8]));
        flowBean.setDownCountFlow(Integer.parseInt(split[9]));
        context.write(new Text(split[1]),flowBean);
    }
}

reduce

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @author kismet
 */
public class WordCountReduce extends Reducer<Text, FlowBean, Text, FlowBean> {

    @Override
    protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
        FlowBean flowBean = new FlowBean();
        Integer upFlow = 0;
        Integer  downFlow = 0;
        Integer upCountFlow = 0;
        Integer downCountFlow = 0;
        for (FlowBean value : values) {
            upFlow += value.getUpFlow();
            downFlow += value.getDownFlow();
            upCountFlow += value.getUpCountFlow();
            downCountFlow += value.getDownCountFlow();
        }
        flowBean.setUpFlow(upFlow);
        flowBean.setDownFlow(downFlow);
        flowBean.setUpCountFlow(upCountFlow);
        flowBean.setDownCountFlow(downCountFlow);
        context.write(key,flowBean);
    }
}

driver

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


/**
 * @author kismet
 */
public class WordCountDriver extends Configured implements Tool {
    @Override
    public int run(String[] args) throws Exception {
        Job job = Job.getInstance(new Configuration(), "www");

        job.setJarByClass(WordCountDriver.class);

        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job, new Path("E:\\第三学期\\第二阶段\\day22\\4\\流量统计\\input"));

        job.setMapperClass(WordCountMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);

        job.setReducerClass(WordCountReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);

        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job, new Path("E:\\第三学期\\第二阶段\\day22\\4\\流量统计\\input\\a"));

        return job.waitForCompletion(true) ? 0 : 1;

    }

    public static void main(String[] args) throws Exception {
        ToolRunner.run(new WordCountDriver(), args);
    }
}

对结果进行排序

1.定义FlowBean实现WritableComparable实现比较排序

java 的compareTo方法说明

compareTo 方法用于将当前对象与方法的参数进行比较。

如果指定的数与参数相等返回 0。

如果指定的数小于参数返回 -1。

如果指定的数大于参数返回 1。

例如:o1.compareTo(o2);

返回正数的话,当前对象(调用 compareTo 方法的对象 o1)要排在比较对象(compareTo 传参对象 o2)后面,返回负数的话,放在前面。

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * @author kismet
 * @date 2019-11-17 14:55
 */
public class Flow implements WritableComparable <Flow>{
    private Integer upFlow;
    private Integer  downFlow;
    private Integer upCountFlow;
    private Integer downCountFlow;

    @Override
    public String toString() {
        return "Flow{" +
                "upFlow=" + upFlow +
                ", downFlow=" + downFlow +
                ", upCountFlow=" + upCountFlow +
                ", downCountFlow=" + downCountFlow +
                '}';
    }

    public Integer getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(Integer upFlow) {
        this.upFlow = upFlow;
    }

    public Integer getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(Integer downFlow) {
        this.downFlow = downFlow;
    }

    public Integer getUpCountFlow() {
        return upCountFlow;
    }

    public void setUpCountFlow(Integer upCountFlow) {
        this.upCountFlow = upCountFlow;
    }

    public Integer getDownCountFlow() {
        return downCountFlow;
    }

    public void setDownCountFlow(Integer downCountFlow) {
        this.downCountFlow = downCountFlow;
    }

    public Flow() {
    }

    public Flow(Integer upFlow, Integer downFlow, Integer upCountFlow, Integer downCountFlow) {
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.upCountFlow = upCountFlow;
        this.downCountFlow = downCountFlow;
    }

    @Override
    public int compareTo(Flow o) {
        return this.upCountFlow > o.upCountFlow?-1:1;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeInt(upFlow);
        out.writeInt(downFlow);
        out.writeInt(upCountFlow);
        out.writeInt(downCountFlow);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.upFlow = in.readInt();
        this.downFlow = in.readInt();
        this.upCountFlow = in.readInt();
        this.downCountFlow = in.readInt();
    }

map,reduce,driver没区别.

发布了80 篇原创文章 · 获赞 168 · 访问量 8万+

猜你喜欢

转载自blog.csdn.net/weixin_44036154/article/details/103131045