MR file merge

 1 package com.euphe.filter;
 2 
 3 import com.euphe.util.HUtils;
 4 import com.euphe.util.Utils;
 5 import org.apache.hadoop.conf.Configuration;
 6 import org.apache.hadoop.conf.Configured;
 7 import org.apache.hadoop.fs.FileSystem;
 8 import org.apache.hadoop.fs.Path;
 9 import org.apache.hadoop.io.Text;
10 import org.apache.hadoop.mapreduce.Job;
11 import org.apache.hadoop.mapreduce.Mapper;
12 import org.apache.hadoop.mapreduce.Reducer;
13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15 import org.apache.hadoop.util.GenericOptionsParser;
16 import org.apache.hadoop.util.Tool;
17 
18 import java.io.IOException;
19 
20 public class ReductionJob extends Configured implements Tool {
21     public static class Map extends Mapper<Object, Text, Text, Text> {
22         private static Text text = new Text();
23 
24         public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
25             text = value;
26             context.write(text, new Text());
27         }
28     }
29 
30     public static class Reduce extends Reducer<Text, Text, Text, Text> {
31         public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
32             context.write(key, new Text());
33         }
34     }
35     @Override
36     public int run(String[] args) throws Exception {
37         Configuration conf = HUtils.getConf();
38         conf.set("mapreduce.job.jar", Utils.getRootPathBasedPath("WEB-INF/jars/redu.jar"));
39         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();//Parse command line parameters 
40          if (otherArgs.length !=2) { // Requires two parameters of input and output paths 
41              System.err.println("Usage: com.euphe.filter.ReductionJob <in> <out> " );
 42              System.exit(2 );
 43          }
 44          Job job = Job.getInstance(conf,"Reduction input :"+otherArgs[0]+" to "+otherArgs[1 ]);
 45          job.setJarByClass(ReductionJob . class );
 46          job.setMapperClass(Map. class );
 47          job.setReducerClass(Reduce. class );
 48          job.setNumReduceTasks(1);
49 
50         job.setOutputKeyClass(Text.class);
51         job.setOutputValueClass(Text.class);
52 
53         FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
54         FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
55         FileSystem.get(conf).delete(new Path(otherArgs[1]), true);//调用任务前先删除输出目录
56         return job.waitForCompletion(true) ? 0 : 1;
57     }
58 }

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324950859&siteId=291194637