1)理解【倒排索引】功能
2)熟悉 MapReduce 中的 Combiner 功能
3)依据需求编码实现【倒排索引】功能,旨在对 MapReduce理解。
数据:
结果:
代码:
package com.hyhc.mr;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
importorg.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
publicclass InvertedIndexMapReduce extends Configured implements Tool{
//url:key1->10
//url:key2->12
//url1:key2->3
publicstaticclass IndexMapper extends
Mapper<LongWritable,Text,Text,Text>{
private Text mapOutputKey = new Text();
private Text mapOutputValue = new Text("1");
@Override
publicvoid map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String lineValue=value.toString();
String strs[]=lineValue.split("##");
String url = strs[0] ;
String title = strs[1] ;
String content = strs[2] ;
String[] tstrs = title.split(" ") ;
for(String ts : tstrs){
mapOutputKey.set(ts+","+url);
context.write(mapOutputKey, mapOutputValue);
}
String[] cstrs = content.split(" ") ;
for(String cs : cstrs){
mapOutputKey.set(cs+","+url);
context.write(mapOutputKey, mapOutputValue);
}
}
}
publicstaticclass IndexCombiner extends
Reducer<Text,Text,Text,Text>{
private Text combinerOutoutKey = new Text() ;
private Text combinerOutputValue = new Text();
@Override
publicvoid reduce(Text key, Iterable<Text> values,
Context context)
throws IOException, InterruptedException {
String keys[]=key.toString().split(",");
combinerOutoutKey.set(keys[0]);
intsum = 0 ;
for(Text value : values){
sum += Integer.parseInt(value.toString());
}
combinerOutputValue.set(keys[1]+"->"+sum);
context.write(combinerOutoutKey, combinerOutputValue);
}
}
publicstaticclass IndexReducer extends
Reducer<Text,Text,Text,Text>{
private Text outputKey = new Text() ;
private Text splitline = new Text("----------------");
private Text splitline1 = new Text("----------------------------------------");
@Override
publicvoid reduce(Text key, Iterable<Text> values,
Context context)
throws IOException, InterruptedException {
outputKey.set("key:"+key);
context.write(outputKey, null);
context.write(splitline, null);
for(Text value : values){
context.write(null, value);
}
context.write(splitline1, null);
}
}
publicint run(String[] args) throws Exception {
Configuration configuration = super.getConf() ;
Job job = Job.getInstance(
configuration,
this.getClass().getSimpleName()
);
job.setJarByClass(this.getClass());
Path inPath = new Path(args[0]) ;
FileInputFormat.addInputPath(job, inPath);
job.setMapperClass(IndexMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setCombinerClass(IndexCombiner.class);
job.setReducerClass(IndexReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
Path outPath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outPath);
booleanisSuccess = job.waitForCompletion(true);
returnisSuccess ? 0 : 1 ;
}
publicstaticvoid main(String[] args) throws Exception {
Configuration configuration = new Configuration();
intstatus = ToolRunner.run(//
configuration, //
new InvertedIndexMapReduce(), //
args
) ;
System.exit(status);
}
}