hadoop详细笔记(十三) mapreduce数据分析案例之实现SQL JOIN语法

免费视频教程 https://www.51doit.com/ 或者联系博主微信 17710299606

1 数据

1.1  user.txt

u001,hls,22,fengjie
u002,wangwu,31,lisi
u003,zhangyanru,22,tananpengyou
u004,laocao,26,fengyi
u005,mengqi,12,nvmengqi
u006,haolei,38,sb
u007,wanghongjing,24,wife
u009,wanghongjing,24,wife

1.2 orders.txt

order011,u001,300
order012,u002,200
order023,u006,100
order056,u007,300
order066,u003,500
order055,u004,300
order021,u005,300
order014,u001,100
order025,u005,300
order046,u007,30
order067,u003,340
order098,u008,310

2 需求和实现

实现SQL语法中的join  left join  right join

2.1 pojo类

package com._51doit.pojo;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.file.Watchable;

/**
 * Author:   多易教育-行哥
 * Date:     2020/7/12
 * Description:
 * 要么存储订单
 * 要么存储用户
 */
public class JoinBean  implements Writable {
    /**用户*/
    private  String uid ;
    private  String name ;
    private  int age ;
    private String friend ;

    private String oid ;
    private double money ;
    private  String tbName ;  // 数据标识

    public JoinBean() {
    }

    public JoinBean(String uid, String name, int age, String friend, String oid, double money, String tbName) {
        this.uid = uid;
        this.name = name;
        this.age = age;
        this.friend = friend;
        this.oid = oid;
        this.money = money;
        this.tbName = tbName;
    }

    public String getUid() {
        return uid;
    }

    public void setUid(String uid) {
        this.uid = uid;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public int getAge() {
        return age;
    }

    public void setAge(int age) {
        this.age = age;
    }

    public String getFriend() {
        return friend;
    }

    public void setFriend(String friend) {
        this.friend = friend;
    }

    public String getOid() {
        return oid;
    }

    public void setOid(String oid) {
        this.oid = oid;
    }

    public double getMoney() {
        return money;
    }

    public void setMoney(double money) {
        this.money = money;
    }

    public String getTbName() {
        return tbName;
    }

    public void setTbName(String tbName) {
        this.tbName = tbName;
    }
    @Override
    public String toString() {
        return  this.oid+","+this.uid+","+this.money ;
    }
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(uid);
        dataOutput.writeUTF(name);
        dataOutput.writeInt(age);
        dataOutput.writeUTF(friend);
        dataOutput.writeUTF(oid);
        dataOutput.writeDouble(money);
        dataOutput.writeUTF(tbName);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.uid = dataInput.readUTF();
        this.name = dataInput.readUTF();
        this.age = dataInput.readInt();
        this.friend = dataInput.readUTF();
        this.oid = dataInput.readUTF();
        this.money = dataInput.readDouble();
        this.tbName = dataInput.readUTF();
    }
}

2.2 Mapper类

知识点 : 获取处理文件的文件名

 @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            FileSplit f = (FileSplit) context.getInputSplit();
            fileName = f.getPath().getName();
        }

JoinBean中的属性的数据类型要一致,不要使用null来补充没有数据的字段 

JoinBean user = new JoinBean(split[0], split[1], Integer.parseInt(split[2]), split[3], "", 0d, "user");

 JoinBean orders = new JoinBean(split[1], "", 0, "", split[0], Double.parseDouble(split[2]), "orders");

 static class JoinMapper extends Mapper<LongWritable, Text, Text, JoinBean> {
        /**
         * 获取文件名  此方法在每个maptask任务中只执行一次
         *
         * @param context
         * @throws IOException
         * @throws InterruptedException
         */
        String fileName = null;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            FileSplit f = (FileSplit) context.getInputSplit();
            fileName = f.getPath().getName();
        }

        Text k = new Text();

        //  知道当前map方法处理的是哪个文件 user.ttx  获取文件名
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();// 用户  订单
            if (fileName.startsWith("user")) { // 用户数据
                // u001,hls,22,fengjie
                String[] split = line.split(",");
                JoinBean user = new JoinBean(split[0], split[1], Integer.parseInt(split[2]), split[3], "", 0d, "user");
                k.set(user.getUid());
                context.write(k, user);
            } else { // 订单数据
                //order011,u001,300
                String[] split = line.split(",");
                JoinBean orders = new JoinBean(split[1], "", 0, "", split[0], Double.parseDouble(split[2]), "orders");
                k.set(orders.getUid());
                context.write(k, orders);
            }

        }
    }

2.3 Reducer类 

static class JoinReducer extends Reducer<Text, JoinBean, Text, NullWritable> {
        Text k = new Text();
        @Override
        protected void reduce(Text key, Iterable<JoinBean> values, Context context) throws IOException, InterruptedException {
            try {
                JoinBean user = new JoinBean();
                List<JoinBean> odersList = new ArrayList<JoinBean>();
                for (JoinBean jb : values) {
                    String tbName = jb.getTbName();
                    if (tbName.equals("user")) { // 用户
                        BeanUtils.copyProperties(user, jb);
                    } else { // 订单
                        JoinBean orders = new JoinBean();
                        BeanUtils.copyProperties(orders, jb);
                        odersList.add(orders);
                    }
                }
               // left join
                    for (JoinBean joinBean : odersList) {
                        String k1 = joinBean.toString() + "," + user.getName() + "," + user.getAge() + "," + user.getFriend();
                        k.set(k1);
                        context.write(k, NullWritable.get());
                    }
                //join
              for (JoinBean joinBean : odersList) {
                    if (user.getAge()!=0){
                        String k1 = joinBean.toString() + "," + user.getName() + "," + user.getAge() + "," + user.getFriend();
                        k.set(k1);
                        context.write(k, NullWritable.get());
                    }
                }

              //+ oin   + 这一步     right join
              if(odersList==null || ordersList.size()){
                  context.write(k, NullWritable.get());
              }

            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

2.4 启动类

 public static void main(String[] args) throws Exception {
        Logger.getLogger("org").setLevel(Level.ERROR);
        Configuration conf = new Configuration();
        // 参数2  job的名字
        Job job = Job.getInstance(conf, new LineDemo().getClass().getSimpleName());

        job.setMapperClass(JoinMapper.class);
        job.setReducerClass(JoinReducer.class);
        // 设置map阶段的输出类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(JoinBean.class);
        // 最终结果的数据类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);
        // job.setNumReduceTasks(2);  //启动3个reduce任务
        // 待处理数据的路径
        FileInputFormat.setInputPaths(job, new Path("D:\\data\\join\\input"));
        FileOutputFormat.setOutputPath(job, new Path("D:\\data\\join\\res2"));
        job.waitForCompletion(true);
    }

猜你喜欢

转载自blog.csdn.net/qq_37933018/article/details/107297870
今日推荐