1.驱动类 Drive代码
package RecordWrite;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
public class MyDrive {
public static void main(String[] args) throws Exception {
Job job = Job.getInstance(new Configuration(), "MyDrive");
job.setJarByClass(MyDrive.class);
job.setMapperClass(MyMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path(""));
job.setOutputFormatClass(MyOutPutfromat.class);
MyOutPutfromat.setOutputPath(job,new Path(""));
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);
}
}
2.自定义输入 Output类
package RecordWrite;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MyOutPutfromat extends FileOutputFormat<Text, NullWritable> {
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
FileSystem fileSystem = FileSystem.get(conf);
FSDataOutputStream fsdGood = fileSystem.create(new Path(""));
FSDataOutputStream fsdBad = fileSystem.create(new Path(""));
MyRW rw = new MyRW(fsdGood, fsdBad);
return rw;
}
3.RecordWrite类
package RecordWrite;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
public class MyRW extends RecordWriter<Text, NullWritable> {
private FSDataOutputStream fsdGood;
private FSDataOutputStream fsdBad;
public MyRW(FSDataOutputStream fsdGood, FSDataOutputStream fsdBad) {
this.fsdGood = fsdGood;
this.fsdBad = fsdBad;
}
public MyRW() {
}
@Override
public void write(Text text, NullWritable nullWritable) throws IOException, InterruptedException {
String[] split = text.toString().split("\\t");
if (split[9].equals("0")){
fsdGood.write(text.toString().getBytes());
fsdGood.write("\r\n".getBytes());
}else {
fsdBad.write(text.toString().getBytes());
fsdBad.write("\r\n".getBytes());
}
}
@Override
public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
if (fsdGood!=null){
fsdGood.close();
}
if (fsdBad!=null){
fsdBad.close();
}
}
}
4.Map类
package RecordWrite;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class MyMap extends Mapper<LongWritable,Text,Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value,NullWritable.get());
}
}