十二、用MapReduce完成类似倒排索引的功能



1)理解【倒排索引】功能
2)熟悉 MapReduce 中的 Combiner 功能
3)依据需求编码实现【倒排索引】功能,旨在对 MapReduce理解。
数据:


结果:

代码:

package com.hyhc.mr;
 
import java.io.IOException;
 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
importorg.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
 
 
publicclass InvertedIndexMapReduce extends Configured implements Tool{
//url:key1->10
//url:key2->12
//url1:key2->3
    publicstaticclass IndexMapper extends
       Mapper<LongWritable,Text,Text,Text>{
       private Text mapOutputKey = new Text();
       private Text mapOutputValue = new Text("1");
       @Override
       publicvoid map(LongWritable key, Text value, Context context)
              throws IOException, InterruptedException {
           String lineValue=value.toString();
           String strs[]=lineValue.split("##");
           String url = strs[0] ;
           String title = strs[1] ;
           String content = strs[2] ;
           String[] tstrs = title.split(" ") ;
           for(String ts : tstrs){
              mapOutputKey.set(ts+","+url);
              context.write(mapOutputKey, mapOutputValue);
           }
           String[] cstrs = content.split(" ") ;
           for(String cs : cstrs){
              mapOutputKey.set(cs+","+url);
              context.write(mapOutputKey, mapOutputValue);
           }
        }
    }
   
    publicstaticclass  IndexCombiner extends
    Reducer<Text,Text,Text,Text>{
    private Text combinerOutoutKey = new Text() ;
    private Text combinerOutputValue = new Text();
   
    @Override
    publicvoid reduce(Text key, Iterable<Text> values,
           Context context)
           throws IOException, InterruptedException {
      
       String keys[]=key.toString().split(",");
       combinerOutoutKey.set(keys[0]);
       intsum = 0 ;
       for(Text value : values){
           sum += Integer.parseInt(value.toString());
       }
       combinerOutputValue.set(keys[1]+"->"+sum);
       context.write(combinerOutoutKey, combinerOutputValue);
    }
}
    publicstaticclass IndexReducer extends
       Reducer<Text,Text,Text,Text>{
       private Text outputKey = new Text() ;
       private Text splitline = new Text("----------------");
       private Text splitline1 = new Text("----------------------------------------");
       @Override
       publicvoid reduce(Text key, Iterable<Text> values,
              Context context)
              throws IOException, InterruptedException {
           outputKey.set("key:"+key);
           context.write(outputKey, null);
           context.write(splitline, null);
           for(Text value : values){
              context.write(null, value);
           }
           context.write(splitline1, null);
       }
    }
    publicint run(String[] args) throws Exception {
       Configuration configuration = super.getConf() ;
       Job job = Job.getInstance(
           configuration,
           this.getClass().getSimpleName()
       );
       job.setJarByClass(this.getClass());
       Path inPath = new Path(args[0]) ;
       FileInputFormat.addInputPath(job, inPath);
       job.setMapperClass(IndexMapper.class);
       job.setMapOutputKeyClass(Text.class);
       job.setMapOutputValueClass(Text.class);
       job.setCombinerClass(IndexCombiner.class);
       job.setReducerClass(IndexReducer.class);
       job.setOutputKeyClass(Text.class);
       job.setOutputValueClass(Text.class);
       Path outPath = new Path(args[1]);
       FileOutputFormat.setOutputPath(job, outPath);
       booleanisSuccess = job.waitForCompletion(true);
       returnisSuccess ? 0 : 1 ;
    }
   
    publicstaticvoid main(String[] args) throws Exception {
       Configuration configuration = new Configuration();
       intstatus = ToolRunner.run(//
           configuration, //
           new InvertedIndexMapReduce(), //
           args
       ) ;
       System.exit(status);
    }
}
 

猜你喜欢

转载自blog.csdn.net/zipo/article/details/54948010