hadoop custom OutputFormat

1, inheritance FileOutputFormat, replication method getRecordWriter

/ ** 
 * @Description: Custom outputFormat, output data to a different file 
 * / 
public  class FilterOutputFormat the extends FileOutputFormat <the Text, NullWritable> { 
    @Override 
    public RecordWriter <the Text, NullWritable> getRecordWriter (TaskAttemptContext Job) throws IOException, InterruptedException {
         return  new new FRecordWriter (Job); 
    } 
}

2, to achieve RecordWriter

/ ** 
 * @Description: inheritance RecordWriter, to achieve the output data to a different directory file 
 * / 
public  class FRecordWriter the extends RecordWriter <the Text, NullWritable> { 
    FSDataOutputStream OUT1 of = null ; 
    FSDataOutputStream OUT2 of the = null ; 

    @Override 
    public  void Write (the Text Key, NullWritable value) throws IOException, InterruptedException {
         // determines whether contains "baidu" and "alibaba" string, file output to a different 
        iF (key.toString (). the contains ( "baidu") || key.toString (). the contains ( "alibaba" )) { 
            out1.write (key.toString () the getBytes ()).; 
        } the else {
            out2.write(key.toString().getBytes());
        }

    }

    @Override
    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
        IOUtils.closeStream(out1);
        IOUtils.closeStream(out2);
    }

    public FRecordWriter(TaskAttemptContext job) {
        FileSystem fs;
        try {
            Path path1 = new Path("output1/a.log");
            Path path2 = new Path("output2/b.log");
            System.out.println(path1.getName());
            System.out.println(path2.getName());
            fs = FileSystem.get(job.getConfiguration());
            out1 = fs.create(path1);
            out2 = fs.create(path2);
        }catch (Exception e){
            e.printStackTrace();
        }

    }
}

3、map

/**
 * @Description: 按行读取,按行写入
 */
public class FilterMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        context.write(value,NullWritable.get());
    }
}

4、reducer

public  class FilterReducer the extends the Reducer <the Text, NullWritable, the Text, NullWritable> {
     Private the Text newLine = new new the Text (); 
    @Override 
    protected  void the reduce (the Text Key, the Iterable <NullWritable> values, the Context context) throws IOException, InterruptedException { 

        // cycle null values in the key value is to prevent duplicate data is not removed
         // the Iterable <NullWritable> values iterator, and stores the key value (although in the present embodiment is a value null value)
         // through the loop iterator, iterator in the key value will be assigned to continuously withdrawn in the Text key (public address memory) 
        for (NullWritable value: values) { 
            newLine.set (key.toString () + "\ R & lt \ n-" );
            context.write(newLine,value);
        }
    }
}

5、driver

/ ** 
 * @Description: Custom Output 
 * Sample achieved by dividing line, comprising determining whether or baidu alibaba string 
 * 1 contains the directory is written, written to the directory does not contain 2, 
 * / 
public  class FilterDriver { 

   public  static  void main (String args []) throws Exception {
        IF (! args.length = 2 ) 
       { 
           System.err.println ( "using the format: FilterDriver <INPUT path> <Output path>" ); 
           System.exit ( -1 ) ; 
       } 


       the Configuration the conf = new new the Configuration (); 
       the Job Job = Job.getInstance (the conf);

       job.setJarByClass(FilterDriver.class);
       job.setMapperClass(FilterMapper.class);
       job.setReducerClass(FilterReducer.class);

       job.setMapOutputKeyClass(Text .class);
       job.setMapOutputValueClass(NullWritable .class);

       job.setOutputKeyClass(Text.class);
       job.setOutputValueClass(NullWritable.class);

       // 要将自定义的输出格式组件设置到job中
       job.setOutputFormatClass(FilterOutputFormat.class);

       FileInputFormat.setInputPaths(job, new Path(args[0])); 

       // Although we have customized outputformat, but because our outputformat inherited from fileoutputformat
        // and fileoutputformat _SUCCESS to output a file, so this will have to specify an output directory 
       FileOutputFormat.setOutputPath (the Job, new new Path (args [. 1 ])); 

       the Path outPath = new new the Path (args [. 1 ]); 
       the FileSystem FS = FileSystem.get (the conf);
        IF (fs.exists (outPath)) { 
           fs.delete (outPath, to true ); 
       } 

       Boolean Result = job.waitForCompletion ( to true ); 
       System.exit (Result 0:?. 1 ); 
   } 


}

 

 

Guess you like

Origin www.cnblogs.com/asker009/p/11440866.html