Hadoop's MapReduce analytic of OutputFormat

OutputFormat MapReduce output is the base class, all the outputs are implemented to achieve MapReduce OutputFormat interface.

OutputFormat common implementation class and SequenceFileOutputFormat TextOutputFormat

1, TextOutputFormat (text output)

The default output format is TextOutputFormat, it put each record written text. Keys and values ​​may be any type, TextOutputFormat call toString () method is converted into a string.

2、SequenceFileOutputFormat

The compact format, easily compressed

3, custom OutputFormat

(1) scenario

In order to achieve the final control file output path and output format can customize OutputFormat

The need for two different output such as the result data to a different directory, then you can use custom OutputFormat

(2) custom step OutputFormat

1) custom class inherits FileOutputFormat

public class FilterOutputFormat extends FileOutputFormat<Text,NullWritable>{

   @Override
   public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext context)
         throws IOException, InterruptedException {
      return new FilterRecordWriter(context);
   }

}

2) can be rewritten RecordWriter, particularly write output data rewriting method ()

public class FilterRecordWriter extends RecordWriter<Text,NullWritable>{

   private FSDataOutputStream hadoopOutputStream=null;
   private FSDataOutputStream otherOutputStream=null;
   @Override
   public void close(TaskAttemptContext context) throws IOException, InterruptedException {
      IOUtils.closeStream(hadoopOutputStream);
      IOUtils.closeStream(otherOutputStream);
   }

   @Override
   public void write(Text text, NullWritable writable) throws IOException, InterruptedException {
      if(text.toString().contains("www.123.com")){
         hadoopOutputStream.write(text.toString().getBytes());
      }else{
         otherOutputStream.write(text.toString().getBytes());
      }
   }

   public FilterRecordWriter(TaskAttemptContext context) {
      FileSystem fileSystem=null;
      try {
         // Get file system
         fileSystem = FileSystem.get(context.getConfiguration());
         // create the output file path
         Path hadoopPath = new Path("/mapreduce/outputFormat/output/123.log");
         Path otherPath = new Path("/mapreduce/outputFormat/output/other.log");
         hadoopOutputStream=fileSystem.create(hadoopPath);
         otherOutputStream=fileSystem.create(otherPath);
      } catch (IOException e) {
         e.printStackTrace ();
      }
   }
   
}
public class FilterMapper extends Mapper<LongWritable, Text, Text, NullWritable>{

   @Override
   protected void map(LongWritable key, Text value,Context context)
         throws IOException, InterruptedException {
      String line = value.toString();
      context.write(new Text(line), NullWritable.get());
   }
}
public class FilterReduce extends Reducer<Text, NullWritable, Text, NullWritable>{

   @Override
   protected void reduce(Text text, Iterable<NullWritable> iterable,
         Context context) throws IOException, InterruptedException {
      // prevent duplicate text to be filtered out
      for(NullWritable nullWritable:iterable){
         context.write(new Text(text.toString()+"\r\n"), NullWritable.get());
      }
   }
}
public static void main(String[] args) throws Exception {
   System.setProperty("HADOOP_USER_NAME", "root");
   Configuration configuration=new Configuration();
   Job job = Job.getInstance(configuration);
   job.setOutputFormatClass(FilterOutputFormat.class);
   job.setMapperClass(FilterMapper.class);
   job.setMapOutputKeyClass(Text.class);
   job.setMapOutputValueClass(NullWritable.class);
   job.setReducerClass(FilterReduce.class);
   job.setOutputKeyClass(Text.class);
   job.setOutputValueClass(NullWritable.class);
   FileInputFormat.setInputPaths(job, new Path("/mapreduce/outputFormat/log"));
   FileOutputFormat.setOutputPath(job, new Path("/mapreduce/outputFormat/output"));
   boolean waitForCompletion = job.waitForCompletion(true);
    System.exit(waitForCompletion==true?0:1);
}

 

Published 63 original articles · won praise 2 · Views 2722

Guess you like

Origin blog.csdn.net/zuodaoyong/article/details/104112949