大数据-MapReduce应用三

案例一

06140411 5 8 102 110 106
06140407 2 6 60 98 80
06140404 10 7 98 31 63
06140403 7 10 105 109 107
06140406 3 3 57 87 92
06140408 10 6 102 102 50
06140402 3 7 54 61 64
06140401 5 3 83 76 111
06140409 5 10 70 56 91
06140412 7 9 22 119 112
06140410 2 1 45 65 80
06140405 3 2 79 20 26

案例二

06140401 Mr.Deng
06140402 Mr.Li
06140403 Mr.Zhang
06140404 Mr.Zhang
06140405 Mr.Zheng
06140406 Mr.Xie
06140407 Mr.Zhi
06140408 Mr.Guo
06140409 Mr.Zhang
06140410 Mr.Su
06140411 Mr.Wu
06140412 Mr.Yao

需求：将学生表加进分数表里，并计算每个学生的总成绩

StudentWritable类

package com.MapJoin.util;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/*
 * @author Administrator
 * @version 1.0
 * @task 将学生表加进分数表里，并计算每个学生的总成绩
 */
public class StudentWritable implements Writable {
    //学号
    private String id;
    //语文
    private int chinese;
    //数学
    private int math;
    //英语
    private int english;
    //总成绩
    private int sum;

    //无参构造方法
    public StudentWritable(){}

    //有参构造方法
    public StudentWritable(String id, int chinese, int math, int english){
        this.id = id;
        this.chinese = chinese;
        this.math = math;
        this.english = english;
        this.sum = chinese + math + english;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(id);
        dataOutput.writeInt(chinese);
        dataOutput.writeInt(math);
        dataOutput.writeInt(english);
        dataOutput.writeInt(sum);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        id = dataInput.readUTF();
        chinese = dataInput.readInt();
        math = dataInput.readInt();
        english = dataInput.readInt();
        sum = dataInput.readInt();
    }

    @Override
    public String toString() {
        return id+"\t"+chinese+"\t"+math+'\t'+english+'\t'+sum;
    }
}

MapJoinMapper类

package com.MapJoin.util;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.*;
import java.util.HashMap;
import java.util.Map;

/*
 * @author Administrator
 * @version 1.0
 * @task 将学生表加进分数表里，并计算每个学生的总成绩
 */
public class MapJoinMapper extends Mapper<LongWritable, Text, Text, StudentWritable> {
    Map<String,String> map = new HashMap<>();
    @Override
    protected void setup(Context context) throws IOException {
        //加载缓存文件
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream("F:/IdeaProjects/studentName.txt"),"UTF-8"));

        String line;

        while (StringUtils.isNotEmpty(line = bufferedReader.readLine())){
            //分割数据
            String[] field = line.split("\t");
            map.put(field[0],field[1]);
        }

        if (bufferedReader != null){
            bufferedReader.close();
        }
    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //分割数据
        String[] field = value.toString().split("\t");
        //学号
        String id = field[1];
        String line = map.get(id);
        //获取语文成绩
        int chinese = Integer.parseInt(field[4]);
        //获取数学成绩
        int math = Integer.parseInt(field[5]);
        //获取英语成绩
        int english = Integer.parseInt(field[6]);
        //写到reducer端
        context.write(new Text(line),new StudentWritable(id,chinese,math,english));
    }
}

MapJoinDriver类

package com.MapJoin.util;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.net.URISyntaxException;

/*
 * @author Administrator
 * @version 1.0
 * @task 将学生表加进分数表里，并计算每个学生的总成绩
 */
public class MapJoinDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
        //实例化Configuration对象
        Configuration conf = new Configuration();
        //创建任务
        Job job = Job.getInstance(conf);
        //指定Jar包的位置
        job.setJarByClass(MapJoinDriver.class);
        //关联Mapper类
        job.setMapperClass(MapJoinMapper.class);
        //设置Mapper输出的数据类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(StudentWritable.class);
        //设置输入路径
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        //设置输出路径
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //设置ReduceTask的数量
        job.setNumReduceTasks(0);
        //提交任务
        boolean b = job.waitForCompletion(true);
        System.exit(b?0:1);
    }
}

案例一

06140411 102 110 106
06140407 60 98 80
06140404 98 31 63
06140403 105 109 107
06140406 57 87 92
06140408 102 102 50
06140402 54 61 64
06140401 83 76 111
06140409 70 56 91
06140412 22 119 112
06140410 45 65 80
06140405 79 20 26

案例二

06140401 Mr.Deng
06140402 Mr.Li
06140403 Mr.Zhang
06140404 Mr.Zhang
06140405 Mr.Zheng
06140406 Mr.Xie
06140407 Mr.Zhi
06140408 Mr.Guo
06140409 Mr.Zhang
06140410 Mr.Su
06140411 Mr.Wu
06140412 Mr.Yao

需求：学号按正序排序，将学生表插进成绩表里，并计算每个学生的总成绩

StudentWritable类

package com.ReduceJoin.util;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/*
 * @author Administrator
 * @version 1.0
 * @task 学号按正序排序，将学生表插进成绩表里，并计算每个学生的总成绩
 */
public class StudentWritable implements Writable {
    //学生姓名
    private String name;
    //学号
    private String id;
    //语文
    private int chinese;
    //数学
    private int math;
    //英语
    private int english;
    //总成绩
    private int sum;
    //定义整型flag
    private int flag;

    public StudentWritable() {
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public int getChinese() {
        return chinese;
    }

    public void setChinese(int chinese) {
        this.chinese = chinese;
    }

    public int getMath() {
        return math;
    }

    public void setMath(int math) {
        this.math = math;
    }

    public int getEnglish() {
        return english;
    }

    public void setEnglish(int english) {
        this.english = english;
    }

    public int getSum() {
        return sum;
    }

    public void setSum(int chinese, int math, int english) {
        this.sum = chinese+math+english;
    }

    public int getFlag() {
        return flag;
    }

    public void setFlag(int flag) {
        this.flag = flag;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(id);
        dataOutput.writeUTF(name);
        dataOutput.writeInt(chinese);
        dataOutput.writeInt(math);
        dataOutput.writeInt(english);
        dataOutput.writeInt(sum);
        dataOutput.writeInt(flag);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        id = dataInput.readUTF();
        name = dataInput.readUTF();
        chinese = dataInput.readInt();
        math = dataInput.readInt();
        english = dataInput.readInt();
        sum = dataInput.readInt();
        flag = dataInput.readInt();
    }

    @Override
    public String toString() {
        return id+"\t"+name+"\t"+chinese+"\t"+math+'\t'+english+'\t'+sum;
    }
}

ReduceJoinMapper类

package com.ReduceJoin.util;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

/*
 * @author Administrator
 * @version 1.0
 * @task 学号按正序排序，将学生表插进成绩表里，并计算每个学生的总成绩
 */
public class ReduceJoinMapper extends Mapper<LongWritable, Text, Text, StudentWritable> {
    @Override
    protected void map(LongWritable num, Text text, Context context) throws IOException, InterruptedException {
        //实例化FileSplit对象
        FileSplit inputSplit = (FileSplit) context.getInputSplit();

        //获取文件名
        String fileName = inputSplit.getPath().getName();

        //实例化StudentWritable对象
        StudentWritable value = new StudentWritable();

        //实例化Text对象
        Text key = new Text();

        //获取行数据
        String line = text.toString();

        //通过文件名区分文件
        if ("grade.txt".equals(fileName)){
            //分割数据
            String[] field = line.split("\t");

            //设置key值
            key.set(field[0]);
            //设置flag值
            value.setFlag(0);
            //设置学号
            value.setId(field[0]);
            //设置学生姓名
            value.setName(" ");
            //设置语文成绩
            value.setChinese(Integer.parseInt(field[1]));
            //设置数学成绩
            value.setMath(Integer.parseInt(field[2]));
            //设置英语成绩
            value.setEnglish(Integer.parseInt(field[3]));
            //设置总成绩
            value.setSum(Integer.parseInt(field[1]),Integer.parseInt(field[2]),Integer.parseInt(field[3]));
        }else {
            //分割数据
            String[] field = line.split("\t");

            //设置key值
            key.set(field[0]);
            //设置flag值
            value.setFlag(1);
            //设置学号
            value.setId(field[0]);
            //设置学生姓名
            value.setName(field[1]);
            //设置语文成绩
            value.setChinese(0);
            //设置数学成绩
            value.setMath(0);
            //设置英语成绩
            value.setEnglish(0);
            //设置总成绩
            value.setSum(0,0,0);
        }

        //输出到Reducer端
        context.write(key,value);
    }
}

ReduceJoinReducer类

package com.ReduceJoin.util;

import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;

/*
 * @author Administrator
 * @version 1.0
 * @task 学号按正序排序，将学生表插进成绩表里，并计算每个学生的总成绩
 */
public class ReduceJoinReducer extends Reducer<Text, StudentWritable, StudentWritable, NullWritable> {
    @Override
    protected void reduce(Text key, Iterable<StudentWritable> values, Context context) throws IOException, InterruptedException {
        //创建集合，存放学生表
        ArrayList<StudentWritable> arrayList = new ArrayList<StudentWritable>();

        //实例化StudentWritable对象,存放成绩表
        StudentWritable studentSheet = new StudentWritable();

        for (StudentWritable v: values){
            if (0 == v.getFlag()){
                //实例化StudentWritable临时对象,存放数据
                StudentWritable gradeSheet = new StudentWritable();

                //拷贝数据
                try {
                    BeanUtils.copyProperties(gradeSheet, v);
                } catch (IllegalAccessException e) {
                    e.printStackTrace();
                } catch (InvocationTargetException e) {
                    e.printStackTrace();
                }

                //将临时对象添加到集合中
                arrayList.add(gradeSheet);
            }else if (1 == v.getFlag()){
                try {
                    BeanUtils.copyProperties(studentSheet, v);
                } catch (IllegalAccessException e) {
                    e.printStackTrace();
                } catch (InvocationTargetException e) {
                    e.printStackTrace();
                }
            }
        }
        for (StudentWritable array: arrayList){
            //加入学生表
            array.setName(studentSheet.getName());
            array.setSum(array.getChinese(),array.getMath(),array.getEnglish());
            //输出到Driver端
            context.write(array,NullWritable.get());
        }
    }
}

ReduceJoinDriver类

package com.ReduceJoin.util;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/*
 * @author Administrator+
 * @version 1.0
 * @task 学号按正序排序，将学生表插进成绩表里，并计算每个学生的总成绩
 */
public class ReduceJoinDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //实例化Configuration对象
        Configuration conf = new Configuration();
        //创建任务
        Job job = Job.getInstance(conf);
        //指定Jar包的位置
        job.setJarByClass(ReduceJoinDriver.class);
        //关联Mapper类
        job.setMapperClass(ReduceJoinMapper.class);
        //关联Reducer类
        job.setReducerClass(ReduceJoinReducer.class);
        //设置Mapper输出的数据类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(StudentWritable.class);
        //设置Reducer输出的数据类型
        job.setOutputKeyClass(StudentWritable.class);
        job.setOutputValueClass(NullWritable.class);
        //设置输入路径
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        //设置输出路径
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        //提交任务
        boolean b = job.waitForCompletion(true);
        System.exit(b?0:1);
    }
}

大数据-MapReduce应用三

猜你喜欢