免费视频教程 https://www.51doit.com/ 或者联系博主微信 17710299606
1 数据
1.1 user.txt
u001,hls,22,fengjie
u002,wangwu,31,lisi
u003,zhangyanru,22,tananpengyou
u004,laocao,26,fengyi
u005,mengqi,12,nvmengqi
u006,haolei,38,sb
u007,wanghongjing,24,wife
u009,wanghongjing,24,wife
1.2 orders.txt
order011,u001,300
order012,u002,200
order023,u006,100
order056,u007,300
order066,u003,500
order055,u004,300
order021,u005,300
order014,u001,100
order025,u005,300
order046,u007,30
order067,u003,340
order098,u008,310
2 需求和实现
实现SQL语法中的join left join right join
2.1 pojo类
package com._51doit.pojo;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.file.Watchable;
/**
* Author: 多易教育-行哥
* Date: 2020/7/12
* Description:
* 要么存储订单
* 要么存储用户
*/
public class JoinBean implements Writable {
/**用户*/
private String uid ;
private String name ;
private int age ;
private String friend ;
private String oid ;
private double money ;
private String tbName ; // 数据标识
public JoinBean() {
}
public JoinBean(String uid, String name, int age, String friend, String oid, double money, String tbName) {
this.uid = uid;
this.name = name;
this.age = age;
this.friend = friend;
this.oid = oid;
this.money = money;
this.tbName = tbName;
}
public String getUid() {
return uid;
}
public void setUid(String uid) {
this.uid = uid;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getFriend() {
return friend;
}
public void setFriend(String friend) {
this.friend = friend;
}
public String getOid() {
return oid;
}
public void setOid(String oid) {
this.oid = oid;
}
public double getMoney() {
return money;
}
public void setMoney(double money) {
this.money = money;
}
public String getTbName() {
return tbName;
}
public void setTbName(String tbName) {
this.tbName = tbName;
}
@Override
public String toString() {
return this.oid+","+this.uid+","+this.money ;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(uid);
dataOutput.writeUTF(name);
dataOutput.writeInt(age);
dataOutput.writeUTF(friend);
dataOutput.writeUTF(oid);
dataOutput.writeDouble(money);
dataOutput.writeUTF(tbName);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.uid = dataInput.readUTF();
this.name = dataInput.readUTF();
this.age = dataInput.readInt();
this.friend = dataInput.readUTF();
this.oid = dataInput.readUTF();
this.money = dataInput.readDouble();
this.tbName = dataInput.readUTF();
}
}
2.2 Mapper类
知识点 : 获取处理文件的文件名
@Override
protected void setup(Context context) throws IOException, InterruptedException {
FileSplit f = (FileSplit) context.getInputSplit();
fileName = f.getPath().getName();
}
JoinBean中的属性的数据类型要一致,不要使用null来补充没有数据的字段
JoinBean user = new JoinBean(split[0], split[1], Integer.parseInt(split[2]), split[3], "", 0d, "user");
JoinBean orders = new JoinBean(split[1], "", 0, "", split[0], Double.parseDouble(split[2]), "orders");
static class JoinMapper extends Mapper<LongWritable, Text, Text, JoinBean> {
/**
* 获取文件名 此方法在每个maptask任务中只执行一次
*
* @param context
* @throws IOException
* @throws InterruptedException
*/
String fileName = null;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
FileSplit f = (FileSplit) context.getInputSplit();
fileName = f.getPath().getName();
}
Text k = new Text();
// 知道当前map方法处理的是哪个文件 user.ttx 获取文件名
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();// 用户 订单
if (fileName.startsWith("user")) { // 用户数据
// u001,hls,22,fengjie
String[] split = line.split(",");
JoinBean user = new JoinBean(split[0], split[1], Integer.parseInt(split[2]), split[3], "", 0d, "user");
k.set(user.getUid());
context.write(k, user);
} else { // 订单数据
//order011,u001,300
String[] split = line.split(",");
JoinBean orders = new JoinBean(split[1], "", 0, "", split[0], Double.parseDouble(split[2]), "orders");
k.set(orders.getUid());
context.write(k, orders);
}
}
}
2.3 Reducer类
static class JoinReducer extends Reducer<Text, JoinBean, Text, NullWritable> {
Text k = new Text();
@Override
protected void reduce(Text key, Iterable<JoinBean> values, Context context) throws IOException, InterruptedException {
try {
JoinBean user = new JoinBean();
List<JoinBean> odersList = new ArrayList<JoinBean>();
for (JoinBean jb : values) {
String tbName = jb.getTbName();
if (tbName.equals("user")) { // 用户
BeanUtils.copyProperties(user, jb);
} else { // 订单
JoinBean orders = new JoinBean();
BeanUtils.copyProperties(orders, jb);
odersList.add(orders);
}
}
// left join
for (JoinBean joinBean : odersList) {
String k1 = joinBean.toString() + "," + user.getName() + "," + user.getAge() + "," + user.getFriend();
k.set(k1);
context.write(k, NullWritable.get());
}
//join
for (JoinBean joinBean : odersList) {
if (user.getAge()!=0){
String k1 = joinBean.toString() + "," + user.getName() + "," + user.getAge() + "," + user.getFriend();
k.set(k1);
context.write(k, NullWritable.get());
}
}
//+ oin + 这一步 right join
if(odersList==null || ordersList.size()){
context.write(k, NullWritable.get());
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
2.4 启动类
public static void main(String[] args) throws Exception {
Logger.getLogger("org").setLevel(Level.ERROR);
Configuration conf = new Configuration();
// 参数2 job的名字
Job job = Job.getInstance(conf, new LineDemo().getClass().getSimpleName());
job.setMapperClass(JoinMapper.class);
job.setReducerClass(JoinReducer.class);
// 设置map阶段的输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(JoinBean.class);
// 最终结果的数据类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
// job.setNumReduceTasks(2); //启动3个reduce任务
// 待处理数据的路径
FileInputFormat.setInputPaths(job, new Path("D:\\data\\join\\input"));
FileOutputFormat.setOutputPath(job, new Path("D:\\data\\join\\res2"));
job.waitForCompletion(true);
}