MapReducer 中MapJoin示例

自动定义类


package groupby;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class join implements Writable {

private int empno ;
private String ename ;
private String deptno ;
private String deptname ;
private String flage ;

public join(int empno,String ename,String deptno,String deptname,String flage ){
    this.ename =ename ;
    this.deptname = deptname ;
    this.deptno = deptno ;
    this.empno = empno ;
    this.flage = flage ;
}

public join(){}

public int getEmpno() {
    return empno;
}

public void setEmpno(int empno) {
    this.empno = empno;
}

public String getEname() {
    return ename;
}

public void setEname(String ename) {
    this.ename = ename;
}

public String getDeptno() {
    return deptno;
}

public void setDeptno(String deptno) {
    this.deptno = deptno;
}

public String getDeptname() {
    return deptname;
}

public void setDeptname(String deptname) {
    this.deptname = deptname;
}

public String getFlage() {
    return flage;
}

public void setFlage(String flage) {
    this.flage = flage;
}

@Override
public String toString() {
    return
          empno + "\t" + ename +"\t"  + deptno + "\t"  + deptname ;
}

@Override
public void write(DataOutput out) throws IOException {
    out.writeInt(empno);
    out.writeUTF(ename);
    out.writeUTF(deptno);
    out.writeUTF(deptname);
    out.writeUTF(flage);
}

@Override
public void readFields(DataInput in) throws IOException {
    this.empno = in.readInt();
    this.ename = in.readUTF() ;
    this.deptno = in.readUTF() ;
    this.deptname = in.readUTF();
    this.flage = in.readUTF() ;
}

}


mapjoin 示例代码


package groupby;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

扫描二维码关注公众号,回复: 12646978 查看本文章

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;

import static org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.getOutputPath;

public class mapJoin {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {

    String input = "data1/emp.txt" ;
    String output = "out2" ;

    final Configuration co = new Configuration() ;
    //获取 Job 对象
    final Job job = Job.getInstance(co);

    //小表添加到缓存中
    job.addCacheFile(new URI("data1/dept.txt"));

    //设置class
    job.setJarByClass(groupby.mapreduce.class);
    //设置mapper 和 Reduce
    job.setMapperClass(MyMapper.class);

   // job.setReducerClass(MyReducer.class);

    //设置 Mapper 阶段输出数据的key 和value
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(join.class);

    //设置 Reducer  阶段输出数据的key 和value
   /* job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(join.class);*/

    //设置输入和输出路径
    FileInputFormat.setInputPaths(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));

    //删除输出路径中的 文件
    Path outDir = getOutputPath(job) ;
    if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
        File file = new File(outDir.toUri()) ;
        if(file.isDirectory()){
            File[] childrenFiles = file.listFiles();
            for (File childFile:childrenFiles){
                childFile.delete() ;
            }
        }
        file.delete();
    }

    //提交 job
    final boolean result = job.waitForCompletion(true);
    System.exit(result ? 0 : 1);

}

/**<LongWritable, Text,           Text,      Access>
 * 文件中的偏移量,单行文件内容,    分类的key , 存储数据自定义的类
 *
 *
 */
public static class  MyMapper extends Mapper<LongWritable, Text, NullWritable,join> {

    private HashMap<String,String> catchFile = new HashMap<String,String>() ;

    @Override
    protected void setup(Context context) throws IOException {
        String path = context.getCacheFiles()[0].getPath();

        BufferedReader read = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8"));
        String line ;
        while(StringUtils.isNotEmpty(line = read.readLine())){
            String[] sp = line.split("\t") ;
            catchFile.put(sp[0].trim(),sp[1].trim()) ;
        }

    }

    //int empno,String ename,String deptno,String deptname,String flage
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] info = value.toString().split("\t") ;
        if(info.length == 8){
            context.write(NullWritable.get(),new join(Integer.valueOf(info[0].trim()),info[1].trim(),info[7].trim(),catchFile.getOrDefault(info[7].trim(),""),""));
        }
    }
}

}

猜你喜欢

转载自blog.51cto.com/15084467/2645410