ReduceJoin
ReduceJoin
package a.b.c;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/*
订单表
detail
order_id item_id amount
12,sp001,2
12,sp002,4
12,sp003,3
13,sp001,2
13,sp002,4
商品信息表 iteminfo
item_id item_type
sp001,type001
sp002,type002
sp003,type002
*/
//对商品表和订单表进行连接,通过item_id进行连接
//对文件夹数据进行识别,并且打flag的形式放入JoinBean类型
class Joinbean implements WritableComparable<Joinbean>{
private String order_id;
private String item_id;
private String amount;
private String item_type;
private String flag;
public Joinbean() {
}
public Joinbean(String order_id,String item_id,String amount,String item_type,String flag) {
this.order_id=order_id;
this.item_id=item_id;
this.amount=amount;
this.item_type=item_type;
this.flag=flag;
}
public String getOrder_id() {
return order_id;
}
public void setOrder_id(String order_id) {
this.order_id = order_id;
}
public String getItem_id() {
return item_id;
}
public void setItem_id(String item_id) {
this.item_id = item_id;
}
public String getAmount() {
return amount;
}
public void setAmount(String amount) {
this.amount = amount;
}
public String getItem_type() {
return item_type;
}
public void setItem_type(String item_type) {
this.item_type = item_type;
}
public String getFlag() {
return flag;
}
public void setFlag(String flag) {
this.flag = flag;
}
@Override
public void write(DataOutput arg0) throws IOException {
arg0.writeUTF(order_id);
arg0.writeUTF(item_id);
arg0.writeUTF(amount);
arg0.writeUTF(item_type);
arg0.writeUTF(flag);
}
@Override
public void readFields(DataInput arg0) throws IOException {
this.order_id=arg0.readUTF();
this.item_id=arg0.readUTF();
this.amount=arg0.readUTF();
this.item_type=arg0.readUTF();
this.flag=arg0.readUTF();
}
@Override
public String toString() {
String aaaString=this.order_id+"\t"+item_id+"\t"+amount+"\t"+item_type+"\t"+flag;
return aaaString;
}
@Override
public int compareTo(Joinbean arg0) {
int thisValue = Integer.parseInt(this.order_id);
int thatValue = Integer.parseInt(arg0.order_id);
return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
}
}
class ReduceJoinMapper extends Mapper<LongWritable, Text, Text,Joinbean >{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
//初始化数据,默认flag为0
String order_id="";
String item_id="";
String amount="";
String item_type="";
String flag="0";
String[] wordsStrings=value.toString().split(",");
//获取文件夹地址,获取地址后,对文件进行操作
FileSplit inpuSplit=(FileSplit)context.getInputSplit();
String fileNameString=inpuSplit.getPath().getName();
//判断表如果是detail,放入JoinBean,并且flag设置为0
if("detail".equals(fileNameString)){
order_id=wordsStrings[0];
item_id=wordsStrings[1];
amount=wordsStrings[2];
//判断表如果是iteminfo,放入JoinBean,并且flag设置为1
}else{
item_id=wordsStrings[0];
item_type=wordsStrings[1];
flag="1";
}
//对每个表的数据,进行输出,并且通过flag来标记数据来自那个表
Joinbean bean=new Joinbean(order_id,item_id,amount,item_type,flag);
System.out.println(item_id+"-----"+bean);
context.write(new Text(item_id),bean);
}
}
class ReduceJoinReducer extends Reducer<Text, Joinbean, NullWritable, Joinbean>{
@Override
protected void reduce(Text key, Iterable<Joinbean> values,Context context)
throws IOException, InterruptedException {
String type=null;
//设置arralist
ArrayList<Joinbean> detailBeans= new ArrayList<Joinbean>();
for (Joinbean joinbean:values){
System.out.println(joinbean.toString());
//如果获取flag为1,则是iteminfo表,直接获取Item_type
if("1".equals(joinbean.getFlag())){
type=joinbean.getItem_type();
}else{
//如果不是1,则为0detail订单详情,因为reduce获取的都是具有相同的key,即相同的item_id,这就是匹配到的符合join条件的两表的数据
detailBeans.add(new Joinbean(joinbean.getOrder_id(),joinbean.getItem_id(),joinbean.getAmount(), joinbean.getItem_type(), joinbean.getFlag()));
}
}
//对Item_type写入到
for (Joinbean bean:detailBeans){
bean.setItem_type(type);
context.write(NullWritable.get(),bean);
}
}
}
public class ReduceJoin {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(ReduceJoin.class);
job.setMapperClass(ReduceJoinMapper.class);
job.setReducerClass(ReduceJoinReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Joinbean.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Joinbean.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
Path path = new Path(args [1]);
FileSystem fileSystem = path.getFileSystem(conf);
if (fileSystem.exists(path)) {
fileSystem.delete((path),true);}
boolean res = job.waitForCompletion(true);
System.exit(res ? 0 : 1);
}
}