The implementation of MapReduce reading file aggregation from HDFS and writing to HDFS

need

Count the sum of upstream traffic, sum of downstream traffic, sum of total upstream traffic, and sum of total downstream traffic of each mobile phone number
Analysis: use the mobile phone number as the key value, four fields for upstream traffic, downstream traffic, total upstream traffic, and total downstream traffic As the value value, then use this key and value as the output of the map stage and the input of the reduce stage

Build POJO objects

package Flow;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * @author SKY
 */
public class FlowBean implements Writable {
    
    
    private Integer upFlow;
    private Integer  downFlow;
    private Integer upCountFlow;
    private Integer downCountFlow;
    @Override
    public void write(DataOutput out) throws IOException {
    
    
        out.writeInt(upFlow);
        out.writeInt(downFlow);
        out.writeInt(upCountFlow);
        out.writeInt(downCountFlow);
    }
    @Override
    public void readFields(DataInput in) throws IOException {
    
    
        this.upFlow = in.readInt();
        this.downFlow = in.readInt();
        this.upCountFlow = in.readInt();
        this.downCountFlow = in.readInt();
    }
    public FlowBean() {
    
    
    }
    public FlowBean(Integer upFlow, Integer downFlow, Integer upCountFlow, Integer downCountFlow) {
    
    
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.upCountFlow = upCountFlow;
        this.downCountFlow = downCountFlow;
    }
    public Integer getUpFlow() {
    
    
        return upFlow;
    }
    public void setUpFlow(Integer upFlow) {
    
    
        this.upFlow = upFlow;
    }
    public Integer getDownFlow() {
    
    
        return downFlow;
    }
    public void setDownFlow(Integer downFlow) {
    
    
        this.downFlow = downFlow;
    }
    public Integer getUpCountFlow() {
    
    
        return upCountFlow;
    }
    public void setUpCountFlow(Integer upCountFlow) {
    
    
        this.upCountFlow = upCountFlow;
    }
    public Integer getDownCountFlow() {
    
    
        return downCountFlow;
    }
    public void setDownCountFlow(Integer downCountFlow) {
    
    
        this.downCountFlow = downCountFlow;
    }
    @Override
    public String toString() {
    
    
        return "FlowBean{" +
                "upFlow=" + upFlow +
                ", downFlow=" + downFlow +
                ", upCountFlow=" + upCountFlow +
                ", downCountFlow=" + downCountFlow +
                '}';
    }
}

Main implementation

package Flow;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * @author SKY
 */
public class FlowBeanMain extends Configured implements Tool {
    
    
    @Override
    public int run(String[] args) throws Exception {
    
    
        Job job = Job.getInstance(super.getConf(), FlowBeanMain.class.getSimpleName());
        job.setJarByClass(FlowBeanMain.class);
        job.setInputFormatClass(TextInputFormat.class);
        TextInputFormat.addInputPath(job,new Path("hdfs://192.168.52.100:8020/in"));
        job.setMapperClass(FlowMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);
        job.setReducerClass(FlowReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        TextOutputFormat.setOutputPath(job,new Path("hdfs://192.168.52.100:8020/opartice"));
        boolean b = job.waitForCompletion(true);
        return b?0:1;
    }
    public static void main(String[] args) throws Exception {
    
    
        ToolRunner.run(new Configuration(),new FlowBeanMain(),args);
    }
}

Mapper implementation

package Flow;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FlowMapper extends Mapper<LongWritable,Text,Text,FlowBean> {
    
    
    FlowBean flowBean = new FlowBean();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
    
        String s = value.toString();
        String[] split = s.split("\t");
        flowBean.setUpFlow(Integer.parseInt(split[6]));
        flowBean.setDownFlow(Integer.parseInt(split[7]));
        flowBean.setUpCountFlow(Integer.parseInt(split[8]));
        flowBean.setDownCountFlow(Integer.parseInt(split[9]));
        context.write(new Text(split[1]),flowBean);
    }
}

Reducer implementation

package Flow;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowReducer extends Reducer<Text,FlowBean,Text,FlowBean> {
    
    
    private FlowBean flowBean = new FlowBean();
    @Override
    protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
    
    
        Integer upFlow = 0;
        Integer  downFlow = 0;
        Integer upCountFlow = 0;
        Integer downCountFlow = 0;
        for (FlowBean value : values) {
    
    
            upFlow += value.getUpFlow();
            downFlow += value.getDownFlow();
            upCountFlow += value.getUpCountFlow();
            downCountFlow += value.getDownCountFlow();
        }
        flowBean.setUpFlow(upFlow);
        flowBean.setDownFlow(downFlow);
        flowBean.setUpCountFlow(upCountFlow);
        flowBean.setDownCountFlow(downCountFlow);
        context.write(key,flowBean);
    }
}

Guess you like

Origin blog.csdn.net/weixin_48143996/article/details/124374691