目录
一、简介
这个应该是mapReduce里面最简单,也是所有人写的第一个例子吧。
二、思路
map阶段,分词,把每个词作为key,value为1
reduce阶段,把每个词的value相加
三、实现
@Slf4j
public class WordCountJob {
/**
*
* @param args 0|profile;1|input;2|output;3|master-ip;4|operator;5|homeDir
* @throws Exception
*/
public static void main(String[] args) throws Exception {
Configuration config = JobUtil.init(args);
Job job = Job.getInstance(config);
job.setJarByClass(WordCountJob.class);
job.setJobName("wordCount");
Path inputPath = new Path(args[1]);
FileInputFormat.addInputPath(job, inputPath);
Path outputPath = new Path(args[2]);
if(outputPath.getFileSystem(config).exists(outputPath)) {
outputPath.getFileSystem(config).delete(outputPath, true);
}
FileOutputFormat.setOutputPath(job, outputPath);
job.setMapperClass(WordCountMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(1);
job.setReducerClass(WordCountReducer.class);
boolean isSuccess = job.waitForCompletion(true);
log.info("isSuccess:" + isSuccess);
System.exit(isSuccess ? 0 : 1);
}
}
@Slf4j
public class WordCountMapper extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
log.info("key:" + key + ",value:" + value);
StringTokenizer itr = new StringTokenizer(value.toString());
while(itr.hasMoreElements()) {
String wordStr = itr.nextToken();
log.info("word:" + wordStr);
word.set(wordStr);
context.write(word, one);
}
}
}
@Slf4j
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
log.info("key:" + key);
int sum = 0;
for(IntWritable value : values) {
log.info("value:" + value);
sum = sum + value.get();
}
result.set(sum);
context.write(key, result);
}
}