MapperReduce序列化作业(二)——排序

1数据源 :上次关于手机流量简单统计业务的product

2.要求  :根据总流量得值进行倒序排序,然后得到输出

3.大概逻辑

(1)FlowSort 类:进行序列化和反序列化,排序逻辑接口实现

(2)FlowSortMapper 类:对数据进行封装

(3)FlowSortReducer类:将key和value进行对调然后封装写入(对调位置是因为只有key(形参)是可以排序的,value就不行)

(4)FlowSortDriver类; 进行driver操作(在代码下面有具体注释) 这里会把输出类型的时候把k和value对调回来

4.代码

(1)FlowSort

package flowsort;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowSort implements WritableComparable<FlowSort> {
    //上flow流量
    private long upFlow;
    //下flow流量
    private long downFlow;
    //sum流量
    private long sumFlow;
    //空参构造
    public FlowSort() {
    }

    @Override
    public String toString() {
        return upFlow +"\t"
                 +downFlow +"\t"+
                sumFlow
                ;
    }

    //序列化方法
    public void write(DataOutput out) throws IOException {

       out.writeLong(upFlow);
        out.writeLong(downFlow);
        out.writeLong(sumFlow);
    }
    //反序列化方法
    public void readFields(DataInput in) throws IOException {
         this.upFlow=in.readLong();
         this.downFlow=in.readLong();
         this.sumFlow=in.readLong();
    }

    //排序逻辑
    public int compareTo(FlowSort o) {
        int result=0;
        if (sumFlow>o.getSumFlow()) {
            result = -1;
        }else if (sumFlow<o.getSumFlow()){
            result=1;

        }else{
            result=0;        }
        return result;
    }

    public long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }

    public long getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(long sumFlow) {
        this.sumFlow = sumFlow;
    }
}

  

(2)FlowSortMapper

package flowsort;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FlowSortMapper extends Mapper<LongWritable, Text,FlowSort,Text> {

    Text k = new Text();
    FlowSort v = new FlowSort();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        //获取一行数据
        String line = value.toString();
        //切割字符串
        String[] filds = line.split("\t");
        //封装对象
        //封装key
        String phonenumber = filds[0];
        //获取上行流量
        long upflow=Long.parseLong(filds[1]);
        //获取下行流量
        long downflow=Long.parseLong(filds[2]);
        //获取总流量
        long sumflow=Long.parseLong(filds[3]);



        k.set(phonenumber);
        v.setUpFlow(upflow);
        v.setDownFlow(downflow);
        v.setSumFlow(sumflow);

        //写出
        context.write(v,k);

    }
}

  

(3)FlowSortReducer

package flowsort;


import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowSortReducer extends Reducer<FlowSort, Text,Text,FlowSort> {
    @Override
    protected void reduce(FlowSort key, Iterable<Text> values, Context context) throws IOException, InterruptedException {


        for (Text phoneNumber : values) {
            context.write(phoneNumber,key);

        }
    }
}

  

(4)FlowSortdriver

package flowsort;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


import java.io.IOException;

public class FlowSortDrivers {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //0 封装输入输出路径
        args =new String[]{"C:/Users/input","C:/Users/output"};
        System.setProperty("hadoop.home.dir","E:/hadoop-2.7.2/");
        //获取job对象
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        //设置jar加载路径
        job.setJarByClass(FlowSortDrivers.class);
        //关联mapper和reducer
        job.setMapperClass(FlowSortMapper.class);
        job.setReducerClass(FlowSortReducer.class);
        //设置map最终输出类型
        job.setMapOutputKeyClass(FlowSort.class);
        job.setMapOutputValueClass(Text.class);
        //设置最终输出类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowSort.class);
        //设置输入路劲
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        //提交job
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);

    }
}

5.结果

6. 我的error:路径没问题,但是job对象没执行完,ouput没有内容

(1)导错包,这真的是致命

(2)函数名打错。。。。。

哦,希望以后不要犯这些低级错误(小声嘀咕:以后肯定还会再犯的*3)

2020-05-26

21:30:13

FlowSort

猜你喜欢

转载自www.cnblogs.com/cheflone/p/12968805.html