MapReduce partial sorting case

Case requirements:

Input data:

Require:

Calculate the total upstream traffic of each row, and output the data with the same first three digits of the mobile phone number to the same file, and sort the total traffic from large to small.

Output result:

three files

Guide package:

<dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.10.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.10.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.10.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
            <version>2.10.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>2.10.0</version>
        </dependency>

    </dependencies>

FlowBean class:

Mainly to rewrite the compareTo method to achieve sorting

What needs to be noted here is that the default sorting is based on the key output by the map, so the class to be sorted must be used as the key.

What is implemented is WritableComparable, not WritableComparator, don’t forget it

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowBean implements WritableComparable<FlowBean> {
    long phone;
    long upFlow;
    long downFlow;
    long totalFlow;

    public FlowBean() {
        super();
    }


    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeLong(phone);
        dataOutput.writeLong(upFlow);
        dataOutput.writeLong(downFlow);
        dataOutput.writeLong(totalFlow);

    }

    public void readFields(DataInput dataInput) throws IOException {
        phone = dataInput.readLong();
        upFlow=dataInput.readLong();
        downFlow=dataInput.readLong();
        totalFlow=dataInput.readLong();
    }

    @Override
    public String toString() {
        return phone+"\t"+upFlow+"\t"+downFlow+"\t"+totalFlow;
    }

    public long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }

    public long getTotalFlow() {
        return totalFlow;
    }

    public void setTotalFlow(long totalFlow) {
        this.totalFlow = totalFlow;
    }

    public long getPhone() {
        return phone;
    }

    public void setPhone(long phone) {
        this.phone = phone;
    }

    public int compareTo(FlowBean o) {
//        return this.getTotalFlow()>o.getTotalFlow()?-1:1;
        if(totalFlow>o.getTotalFlow())
            return -1;
        else if (totalFlow<o.getTotalFlow())
            return 1;
        else
            return 0;
    }
}

MyMapper class:

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class MyMapper extends Mapper<LongWritable,Text, FlowBean,NullWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line=value.toString();
        String[] s = line.split("\\t");


        FlowBean k = new FlowBean();
        k.setPhone(Long.parseLong(s[0]));

        k.setUpFlow(Long.parseLong(s[1]));

        k.setDownFlow(Long.parseLong(s[2]));

        k.setTotalFlow(k.getDownFlow()+k.getUpFlow());

        context.write(k,NullWritable.get());


    }
}

MyReducer class:

mport org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class MyReducer extends Reducer<FlowBean,NullWritable, FlowBean,NullWritable> {

    @Override
    protected void reduce(FlowBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
        context.write(key,NullWritable.get());
    }
}

MyPartitioner class:

Partitioner generic parameters are map output, see k, v type

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;

public class MyPartitioner extends Partitioner<FlowBean,NullWritable> {


    public int getPartition(FlowBean flowBean, NullWritable nullWritable, int i) {
        int partition=0;
        if(flowBean.getPhone()==186)
            partition=0;
        else if(flowBean.getPhone()==158)
            partition=1;
        else
            partition=2;
        return partition;
    }
}

MyDriver class:

Main partitions:

job.setPartitionerClass(MyPartitioner.class);Set partition class

job.setNumReduceTasks(3);Set the number of partitions

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class MyDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(MyDriver.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);

//        job.setMapOutputKeyClass(LongWritable.class);
//        job.setMapOutputValueClass(FlowBean.class);

        job.setOutputKeyClass(FlowBean.class);
        job.setOutputValueClass(NullWritable.class);

        job.setPartitionerClass(MyPartitioner.class);
        job.setNumReduceTasks(3);


        FileInputFormat.addInputPath(job,new Path("/home/hadoop/temp/phone_info.txt"));
        FileOutputFormat.setOutputPath(job,new Path("/home/hadoop/temp/phone_info_Partition"));

        FileSystem.get(conf).delete(new Path("/home/hadoop/temp/phone_info_Partition"),true);


        boolean b = job.waitForCompletion(true);

        System.exit(b?0:1);
    }
}