MapReduce之自定义输出

1.Drive驱动类

package RecodReader;



import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;

import java.io.IOException;

public class MyDrive {
    
    
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    
    
        Job job = Job.getInstance(new Configuration(), "MyDiy");

        job.setJarByClass(MyDrive.class);
        job.setMapperClass(MyMap.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(BytesWritable.class);

        //设置文件读取的Format
        job.setInputFormatClass(MyInpuformat.class);

        MyInpuformat.addInputPath(job,new Path("file:///+路径"));
        //设置输出格式
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job,new Path("file:///+路径"));
            boolean b = job.waitForCompletion(true);
            System.exit(b?0:1);


    }
}

2.自定义输出MapReduce Output类

package RecodReader;

import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import java.io.IOException;

public class MyOnpuformat extends FileInputFormat {
    
    
    @Override
    public RecordReader createRecordReader(InputSplit splist, TaskAttemptContext context) throws IOException, InterruptedException {
    
    
        MyRR rr = new MyRR();
        rr.initialize(splist,context);
        return rr;
    }
}

3.RecordRead类

package RecodReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class MyRR extends RecordReader {
    
    
    private FileSplit split1;
    private Configuration conf;
    private BytesWritable bytesWritable =new BytesWritable();
    private Boolean check = false;


    //初始化
    @Override
    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    
    
        split1 = (FileSplit) split;
        conf = context.getConfiguration();
    }

    //获取下一条数据
    //获取全部数据
    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
    
    
        if (!check){
    
    
            Path path = split1.getPath();
            FileSystem fileSystem = FileSystem.get(conf);
            FSDataInputStream open = fileSystem.open(path);
            byte[] bytes = new byte[(int) split1.getLength()];
            IOUtils.readFully(open,bytes,0,bytes.length);
            bytesWritable.set(bytes,0, bytes.length);
            check=true;
                return true;
        }
         return false;

    }

    @Override
    public Object getCurrentKey() throws IOException, InterruptedException {
    
    
        return NullWritable.get();
    }

    @Override
    public Object getCurrentValue() throws IOException, InterruptedException {
    
    
        return bytesWritable;
    }



    @Override
    public float getProgress() throws IOException, InterruptedException {
    
    
        return 0;
    }

    @Override
    public void close() throws IOException {
    
    

    }
}

4.Map类

package RecodReader;

import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class MyMap extends Mapper<NullWritable, BytesWritable, Text,BytesWritable> {
    
    
    @Override
    protected void map(NullWritable key, BytesWritable value, Context context) throws IOException, InterruptedException {
    
    
        FileSplit fileSplit = (FileSplit) context.getInputSplit();
        String name = fileSplit.getPath().getName();
        context.write(new Text(name),value);
    }
}

Guess you like

Origin blog.csdn.net/qq_45769990/article/details/109725626
Recommended