1, inheritance FileOutputFormat, replication method getRecordWriter
/ ** * @Description: Custom outputFormat, output data to a different file * / public class FilterOutputFormat the extends FileOutputFormat <the Text, NullWritable> { @Override public RecordWriter <the Text, NullWritable> getRecordWriter (TaskAttemptContext Job) throws IOException, InterruptedException { return new new FRecordWriter (Job); } }
2, to achieve RecordWriter
/ ** * @Description: inheritance RecordWriter, to achieve the output data to a different directory file * / public class FRecordWriter the extends RecordWriter <the Text, NullWritable> { FSDataOutputStream OUT1 of = null ; FSDataOutputStream OUT2 of the = null ; @Override public void Write (the Text Key, NullWritable value) throws IOException, InterruptedException { // determines whether contains "baidu" and "alibaba" string, file output to a different iF (key.toString (). the contains ( "baidu") || key.toString (). the contains ( "alibaba" )) { out1.write (key.toString () the getBytes ()).; } the else { out2.write(key.toString().getBytes()); } } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { IOUtils.closeStream(out1); IOUtils.closeStream(out2); } public FRecordWriter(TaskAttemptContext job) { FileSystem fs; try { Path path1 = new Path("output1/a.log"); Path path2 = new Path("output2/b.log"); System.out.println(path1.getName()); System.out.println(path2.getName()); fs = FileSystem.get(job.getConfiguration()); out1 = fs.create(path1); out2 = fs.create(path2); }catch (Exception e){ e.printStackTrace(); } } }
3、map
/** * @Description: 按行读取,按行写入 */ public class FilterMapper extends Mapper<LongWritable, Text, Text, NullWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { context.write(value,NullWritable.get()); } }
4、reducer
public class FilterReducer the extends the Reducer <the Text, NullWritable, the Text, NullWritable> { Private the Text newLine = new new the Text (); @Override protected void the reduce (the Text Key, the Iterable <NullWritable> values, the Context context) throws IOException, InterruptedException { // cycle null values in the key value is to prevent duplicate data is not removed // the Iterable <NullWritable> values iterator, and stores the key value (although in the present embodiment is a value null value) // through the loop iterator, iterator in the key value will be assigned to continuously withdrawn in the Text key (public address memory) for (NullWritable value: values) { newLine.set (key.toString () + "\ R & lt \ n-" ); context.write(newLine,value); } } }
5、driver
/ ** * @Description: Custom Output * Sample achieved by dividing line, comprising determining whether or baidu alibaba string * 1 contains the directory is written, written to the directory does not contain 2, * / public class FilterDriver { public static void main (String args []) throws Exception { IF (! args.length = 2 ) { System.err.println ( "using the format: FilterDriver <INPUT path> <Output path>" ); System.exit ( -1 ) ; } the Configuration the conf = new new the Configuration (); the Job Job = Job.getInstance (the conf); job.setJarByClass(FilterDriver.class); job.setMapperClass(FilterMapper.class); job.setReducerClass(FilterReducer.class); job.setMapOutputKeyClass(Text .class); job.setMapOutputValueClass(NullWritable .class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); // 要将自定义的输出格式组件设置到job中 job.setOutputFormatClass(FilterOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); // Although we have customized outputformat, but because our outputformat inherited from fileoutputformat // and fileoutputformat _SUCCESS to output a file, so this will have to specify an output directory FileOutputFormat.setOutputPath (the Job, new new Path (args [. 1 ])); the Path outPath = new new the Path (args [. 1 ]); the FileSystem FS = FileSystem.get (the conf); IF (fs.exists (outPath)) { fs.delete (outPath, to true ); } Boolean Result = job.waitForCompletion ( to true ); System.exit (Result 0:?. 1 ); } }