Section 3 mapreduce Advanced: 7, Custom outputformat achieve the output to a different folder below

2.1 Requirements

There are some orders review data needs, praise and Poor orders are separate, separate to a different folder below to the final data, data content References folder, where the data field represents the ninth praise , Average, Poor. 0: Good, 1: Average 2: Poor

Data contents similar to the following:

12018-03-15 22:29:06 2018-03-15 22:29:06 I would like to come back a \ N 1 3 hello Come and it 02018-03-14 22:29:03
22018-03- 15 22:42:08 2018-03-15 22:42:08 good \ N 1 1 Add a right to say let's go and walked 02018-03-14 22:42:04

2.2 Analysis

The key point of the program is to mapreduce in a program based on different types of output resulting data to a different directory, such flexible output demand can be achieved through custom outputformat

2.3 realized

Realization points:

1, access to external resources in the mapreduce

Method 2, custom outputFormat, wherein the recordwriter rewritten, rewriting specific output data write ()

 

 Code:

MyOutputFormatMain :
package cn.itcast.demo4.outputformat;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class MyOutputFormatMain extends Configured implements Tool{
@Override
public int run(String[] args) throws Exception {

Job job = Job.getInstance(this.getConf(), MyOutputFormatMain.class.getSimpleName());
// job.setJarByClass(MyInputFormatMain.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("file:///D:\\Study\\BigData\\heima\\stage2\\5、大数据离线第五天\\自定义outputformat\\input"));

job.setMapperClass(MyOutputFormatMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);

job.setOutputFormatClass(MyOutputFormat.class);
TextOutputFormat.setOutputPath(job,new Path("file:///D:\\Study\\BigData\\heima\\stage2\\5、大数据离线第五天\\自定义outputformat\\output"));

boolean b = job.waitForCompletion(true);
return b?0:1;
}

public static void main(String[] args) throws Exception {
int run = ToolRunner.run(new Configuration(), new MyOutputFormatMain(), args);
System.exit(run);
}
}


MyOutputFormatMapper:
package cn.itcast.demo4.outputformat;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class MyOutputFormatMapper extends Mapper<LongWritable,Text,Text,NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value,NullWritable.get());
}
}

MyOutputFormat:
package cn.itcast.demo4.outputformat;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class MyOutputFormat extends FileOutputFormat<Text,NullWritable> {

@Override
RecordWriter public <the Text, NullWritable> getRecordWriter (TaskAttemptContext context) throws IOException, InterruptedException {
the Configuration context.getConfiguration the conf = ();
the FileSystem FileSystem = FileSystem.get (the conf);
FSDataOutputStream goodComment = fileSystem.create (new new the Path ( "File: / // D: \\ Study \\ BigData \\ heima \\ stage2 \\ 5, the fifth day of large data offline goodComment \\ \\ \\ custom goodComment.txt outputFormat "));

FSDataOutputStream badComment fileSystem.create = ( new Path ( "file: /// D : \\ Study \\ BigData \\ heima \\ stage2 \\ 5, the fifth day off large data \\ custom outputformat \\ badComment \\ badComment.txt")) ;

= new new myRecordWriter myRecordWriter myRecordWriter (goodComment, badComment);
return myRecordWriter;
}
}

MyRecordWriter:
cn.itcast.demo4.outputformat Package; 

Import org.apache.commons.io.IOUtils;
Import org.apache.hadoop.fs.FSDataOutputStream;
Import org.apache.hadoop.io.NullWritable;
Import org.apache.hadoop.io .text;
Import org.apache.hadoop.mapreduce.RecordWriter;
Import org.apache.hadoop.mapreduce.TaskAttemptContext;

Import java.io.IOException;

public class MyRecordWriter the extends RecordWriter <the Text, NullWritable> {
// we define two outputs stream, are output to go to a different folder below
Private FSDataOutputStream goodStream;
Private FSDataOutputStream badStream;

public MyRecordWriter (FSDataOutputStream goodStream, FSDataOutputStream badStream) {
this.goodStream = goodStream;
= badStream this.badStream;
}

/ **
* We will write out data
* @param text we have a line of comment data
* @param nullWritable
* @throws IOException
* @throws InterruptedException
* /
@Override
public void the Write (Text text, NullWritable nullWritable) throws IOException, InterruptedException {
// 1 2018-03-15 22:29:06 2018-03-15 22:29:06 I would like to come back a \ N 1 3 hello come and it 02018-03-1422 : 29: 03
String [] = text.toString Split () Split ( "\ T");.
Integer = the Integer.parseInt commentStatus (Split [. 9]);

IF (commentStatus <=. 1) {// Good 0, the data Comments 1
// the output stream, out to write out data
goodStream.write(text.toString().getBytes());
goodStream.write("\r\n".getBytes());//换行
}else{//差评2数据
badStream.write(text.toString().getBytes());
badStream.write("\r\n".getBytes());
}

}

@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
IOUtils.closeQuietly(goodStream);
IOUtils.closeQuietly(badStream);
}
}

Guess you like

Origin www.cnblogs.com/mediocreWorld/p/11041026.html