OutputFormat MapReduce output is the base class, all the outputs are implemented to achieve MapReduce OutputFormat interface.
OutputFormat common implementation class and SequenceFileOutputFormat TextOutputFormat
1, TextOutputFormat (text output)
The default output format is TextOutputFormat, it put each record written text. Keys and values may be any type, TextOutputFormat call toString () method is converted into a string.
2、SequenceFileOutputFormat
The compact format, easily compressed
3, custom OutputFormat
(1) scenario
In order to achieve the final control file output path and output format can customize OutputFormat
The need for two different output such as the result data to a different directory, then you can use custom OutputFormat
(2) custom step OutputFormat
1) custom class inherits FileOutputFormat
public class FilterOutputFormat extends FileOutputFormat<Text,NullWritable>{ @Override public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { return new FilterRecordWriter(context); } }
2) can be rewritten RecordWriter, particularly write output data rewriting method ()
public class FilterRecordWriter extends RecordWriter<Text,NullWritable>{ private FSDataOutputStream hadoopOutputStream=null; private FSDataOutputStream otherOutputStream=null; @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { IOUtils.closeStream(hadoopOutputStream); IOUtils.closeStream(otherOutputStream); } @Override public void write(Text text, NullWritable writable) throws IOException, InterruptedException { if(text.toString().contains("www.123.com")){ hadoopOutputStream.write(text.toString().getBytes()); }else{ otherOutputStream.write(text.toString().getBytes()); } } public FilterRecordWriter(TaskAttemptContext context) { FileSystem fileSystem=null; try { // Get file system fileSystem = FileSystem.get(context.getConfiguration()); // create the output file path Path hadoopPath = new Path("/mapreduce/outputFormat/output/123.log"); Path otherPath = new Path("/mapreduce/outputFormat/output/other.log"); hadoopOutputStream=fileSystem.create(hadoopPath); otherOutputStream=fileSystem.create(otherPath); } catch (IOException e) { e.printStackTrace (); } } }
public class FilterMapper extends Mapper<LongWritable, Text, Text, NullWritable>{ @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { String line = value.toString(); context.write(new Text(line), NullWritable.get()); } }
public class FilterReduce extends Reducer<Text, NullWritable, Text, NullWritable>{ @Override protected void reduce(Text text, Iterable<NullWritable> iterable, Context context) throws IOException, InterruptedException { // prevent duplicate text to be filtered out for(NullWritable nullWritable:iterable){ context.write(new Text(text.toString()+"\r\n"), NullWritable.get()); } } }
public static void main(String[] args) throws Exception { System.setProperty("HADOOP_USER_NAME", "root"); Configuration configuration=new Configuration(); Job job = Job.getInstance(configuration); job.setOutputFormatClass(FilterOutputFormat.class); job.setMapperClass(FilterMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(FilterReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.setInputPaths(job, new Path("/mapreduce/outputFormat/log")); FileOutputFormat.setOutputPath(job, new Path("/mapreduce/outputFormat/output")); boolean waitForCompletion = job.waitForCompletion(true); System.exit(waitForCompletion==true?0:1); }