输入数据
输出数据,把含有kxj的日志单独存放到指定的文件里,其它日志存到另一个文件里,
package com.buba.mapreduce.filter;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class FilterOutputformat extends FileOutputFormat<Text,NullWritable> {
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
return new FilterRecordWriter(taskAttemptContext);
}
}
package com.buba.mapreduce.filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
public class FilterRecordWriter extends RecordWriter<Text, NullWritable> {
private FSDataOutputStream kxjOut = null;
private FSDataOutputStream otherOut = null;
public FilterRecordWriter(TaskAttemptContext job) {
Configuration configuration = job.getConfiguration();
try {
//获取文件系统
FileSystem fileSystem = FileSystem.get(configuration);
//创建两个文件的输出流
kxjOut = fileSystem.create(new Path("f:/output5/kxj.log"));
otherOut = fileSystem.create(new Path("f:/output5/other.log"));
}catch (Exception e){
e.printStackTrace();
}
}
@Override
public void write(Text key, NullWritable value) throws IOException, InterruptedException {
//区分输入的key是否包含kxj
if(key.toString().contains("kxj")){//包含
kxjOut.write(key.toString().getBytes());
}else{//不包含
otherOut.write(key.toString().getBytes());
}
}
@Override
public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
if(kxjOut!=null){
kxjOut.close();
}
if(otherOut!=null){
otherOut.close();
}
}
}
package com.buba.mapreduce.filter;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class FilterMapper extends Mapper<LongWritable, Text,Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value,NullWritable.get());
}
}
package com.buba.mapreduce.filter;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FilterReducer extends Reducer<Text, NullWritable,Text, NullWritable> {
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
context.write(new Text(key.toString()+"\r\n"),NullWritable.get());
}
}
package com.buba.mapreduce.filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class FilterDriver {
public static void main(String[] args)throws Exception {
//1.获取job信息
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
//2.获取jar的存储路径
job.setJarByClass(FilterDriver.class);
//3.关联map和reduce的class类
job.setMapperClass(FilterMapper.class);
job.setReducerClass(FilterReducer.class);
//4.设置map阶段输出key和value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
//5.设置最后输入数据的key和value的类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//设置分区
// job.setPartitionerClass(FlowPartitioner.class);
//设置reducetask个数也就是得和FlowPartitioner里设置的分区个数一致
//job.setNumReduceTasks(1);
//将自定义输出格式组件设置到job中
job.setOutputFormatClass(FilterOutputformat.class);
//6.设置输入数据的路径和输出数据的路径
FileInputFormat.setInputPaths(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//7.提交
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);
}
}