【Hadoop学习之十三】MapReduce案例分析五-ItemCF

环境
　　虚拟机：VMware 10
　　Linux版本：CentOS-6.5-x86_64
　　客户端：Xshell4
　　FTP：Xftp4
　　jdk8
　　hadoop-3.1.1

推荐系统——协同过滤(Collaborative Filtering)算法
ItemCF：基于item的协同过滤，通过用户对不同item的评分来评测item之间的相似性，基于item之间的相似性做出推荐。简单来讲就是：给用户推荐和他之前喜欢的物品相似的物品。

Co-occurrence Matrix(同现矩阵)和User Preference Vector(用户评分向量)相乘得到的这个Recommended Vector(推荐向量)
基于全量数据的统计，产生同现矩阵
　　·体现商品间的关联性
　　·每件商品都有自己对其他全部商品的关联性（每件商品的特征）
用户评分向量体现的是用户对一些商品的评分
任一商品需要：
　　·用户评分向量乘以基于该商品的其他商品关联值
　　·求和得出针对该商品的推荐向量
　　·排序取TopN即可

通过历史订单交易记录
计算得出每一件商品相对其他商品同时出现在同一订单的次数
　　·so：每件商品都有自己相对全部商品的同现列表
用户会对部分商品有过加入购物车，购买等实际操作，经过计算会得到用户对这部分商品的评分向量列表
使用用户评分向量列表中的分值：
　　·依次乘以每一件商品同现列表中该分值的代表物品的同现值
　　·求和便是该物品的推荐向量

package test.mr.itemcf;

import java.util.HashMap;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;

public class StartRun {

    public static void main(String[] args) {
        Configuration conf = new Configuration();
        
        conf.set("mapreduce.app-submission.corss-paltform", "true");
        conf.set("mapreduce.framework.name", "local");
        
        //所有mr的输入和输出目录定义在map集合中
        Map<String, String> paths = new HashMap<String, String>();
        paths.put("Step1Input", "/data/itemcf/input/");
        paths.put("Step1Output", "/data/itemcf/output/step1");
        paths.put("Step2Input", paths.get("Step1Output"));
        paths.put("Step2Output", "/data/itemcf/output/step2");
        paths.put("Step3Input", paths.get("Step2Output"));
        paths.put("Step3Output", "/data/itemcf/output/step3");
        paths.put("Step4Input1", paths.get("Step2Output"));
        paths.put("Step4Input2", paths.get("Step3Output"));
        paths.put("Step4Output", "/data/itemcf/output/step4");
        paths.put("Step5Input", paths.get("Step4Output"));
        paths.put("Step5Output", "/data/itemcf/output/step5");
        paths.put("Step6Input", paths.get("Step5Output"));
        paths.put("Step6Output", "/data/itemcf/output/step6");

        Step1.run(conf, paths);
        Step2.run(conf, paths);
//        Step3.run(conf, paths);
//        Step4.run(conf, paths);
//        Step5.run(conf, paths);
//        Step6.run(conf, paths);
    }

    public static Map<String, Integer> R = new HashMap<String, Integer>();
    static {
        R.put("click", 1);
        R.put("collect", 2);
        R.put("cart", 3);
        R.put("alipay", 4);
    }
}
package test.mr.itemcf;

import java.io.IOException;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 去重复
 * @author root
 *
 */
public class Step1 {

    
    public static boolean run(Configuration config,Map<String, String> paths){
        try {
            FileSystem fs =FileSystem.get(config);
            Job job =Job.getInstance(config);
            job.setJobName("step1");
            job.setJarByClass(Step1.class);
            job.setMapperClass(Step1_Mapper.class);
            job.setReducerClass(Step1_Reducer.class);
            
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(NullWritable.class);
            
            FileInputFormat.addInputPath(job, new Path(paths.get("Step1Input")));
            Path outpath=new Path(paths.get("Step1Output"));
            if(fs.exists(outpath)){
                fs.delete(outpath,true);
            }
            FileOutputFormat.setOutputPath(job, outpath);
            
            boolean f= job.waitForCompletion(true);
            return f;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }
    
     static class Step1_Mapper extends Mapper<LongWritable, Text, Text, NullWritable>{

        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            if(key.get()!=0){
                context.write(value, NullWritable.get());
            }
        }
    }
     
     static class Step1_Reducer extends Reducer<Text, IntWritable, Text, NullWritable>{

            protected void reduce(Text key, Iterable<IntWritable> i, Context context)
                    throws IOException, InterruptedException {
                context.write(key,NullWritable.get());
            }
        }
}
package test.mr.itemcf;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 按用户分组，计算所有物品出现的组合列表，得到用户对物品的喜爱度得分矩阵
    u13    i160:1,
    u14    i25:1,i223:1,
    u16    i252:1,
    u21    i266:1,
    u24    i64:1,i218:1,i185:1,
    u26    i276:1,i201:1,i348:1,i321:1,i136:1,
 * @author root
 *
 */
public class Step2 {

    
    public static boolean run(Configuration config,Map<String, String> paths){
        try {
            FileSystem fs =FileSystem.get(config);
            Job job =Job.getInstance(config);
            job.setJobName("step2");
            job.setJarByClass(StartRun.class);
            job.setMapperClass(Step2_Mapper.class);
            job.setReducerClass(Step2_Reducer.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            
            FileInputFormat.addInputPath(job, new Path(paths.get("Step2Input")));
            Path outpath=new Path(paths.get("Step2Output"));
            if(fs.exists(outpath)){
                fs.delete(outpath,true);
            }
            FileOutputFormat.setOutputPath(job, outpath);
            
            boolean f= job.waitForCompletion(true);
            return f;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }
    
     static class Step2_Mapper extends Mapper<LongWritable, Text, Text, Text>{

         //如果使用：用户+物品，同时作为输出key，更优
         //i161,u2625,click,2014/9/18 15:03
        protected void map(LongWritable key, Text value,
                Context context)
                throws IOException, InterruptedException {
            String[]  tokens=value.toString().split(",");
            String item=tokens[0];
            String user=tokens[1];
            String action =tokens[2];
            Text k= new Text(user);
            Integer rv =StartRun.R.get(action);
            Text v =new Text(item+":"+ rv.intValue());
            context.write(k, v);
            //u2625    i161:1
        }
    }
    
     
     static class Step2_Reducer extends Reducer<Text, Text, Text, Text>{

            protected void reduce(Text key, Iterable<Text> i,
                    Context context)
                    throws IOException, InterruptedException {
                Map<String, Integer> r =new HashMap<String, Integer>();
                //u2625
                // i161:1
                // i161:2
                // i161:4
                // i162:3
                // i161:4
                for(Text value :i){
                    String[] vs =value.toString().split(":");
                    String item=vs[0];
                    Integer action=Integer.parseInt(vs[1]);
                    action = ((Integer) (r.get(item)==null?  0:r.get(item))).intValue() + action;
                    r.put(item,action);
                }
                StringBuffer sb =new StringBuffer();
                for(Entry<String, Integer> entry :r.entrySet() ){
                    sb.append(entry.getKey()+":"+entry.getValue().intValue()+",");
                }
                
                context.write(key,new Text(sb.toString()));
            }
        }
}
package test.mr.itemcf;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;
/**
 * 对物品组合列表进行计数，建立物品的同现矩阵
i100:i100    3
i100:i105    1
i100:i106    1
i100:i109    1
i100:i114    1
i100:i124    1
 * @author root
 *
 */
public class Step3 {
     private final static Text K = new Text();
     private final static IntWritable V = new IntWritable(1);
    
    public static boolean run(Configuration config,Map<String, String> paths){
        try {
            FileSystem fs =FileSystem.get(config);
            Job job =Job.getInstance(config);
            job.setJobName("step3");
            job.setJarByClass(StartRun.class);
            job.setMapperClass(Step3_Mapper.class);
            job.setReducerClass(Step3_Reducer.class);
            job.setCombinerClass(Step3_Reducer.class);
//            
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            
            
            
            FileInputFormat.addInputPath(job, new Path(paths.get("Step3Input")));
            Path outpath=new Path(paths.get("Step3Output"));
            if(fs.exists(outpath)){
                fs.delete(outpath,true);
            }
            FileOutputFormat.setOutputPath(job, outpath);
            
            boolean f= job.waitForCompletion(true);
            return f;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }
    
     static class Step3_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{

        protected void map(LongWritable key, Text value,
                Context context)
                throws IOException, InterruptedException {
            
            //u3244    i469:1,i498:1,i154:1,i73:1,i162:1,
            String[]  tokens=value.toString().split("\t");
            String[] items =tokens[1].split(",");
            for (int i = 0; i < items.length; i++) {
                String itemA = items[i].split(":")[0];
                for (int j = 0; j < items.length; j++) {
                    String itemB = items[j].split(":")[0];
                    K.set(itemA+":"+itemB);
                    context.write(K, V);
                }
            }
            
        }
    }
    
     
     static class Step3_Reducer extends Reducer<Text, IntWritable, Text, IntWritable>{

            protected void reduce(Text key, Iterable<IntWritable> i,
                    Context context)
                    throws IOException, InterruptedException {
                int sum =0;
                for(IntWritable v :i ){
                    sum =sum+v.get();
                }
                V.set(sum);
                context.write(key, V);
            }
        }
     
}

package test.mr.itemcf;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.log4j.Logger;


/**
 * 
 * 把同现矩阵和得分矩阵相乘
 * @author root
 *
 */
public class Step4 {

    public static boolean run(Configuration config, Map<String, String> paths) {
        try {
            FileSystem fs = FileSystem.get(config);
            Job job = Job.getInstance(config);
            job.setJobName("step4");
            job.setJarByClass(StartRun.class);
            job.setMapperClass(Step4_Mapper.class);
            job.setReducerClass(Step4_Reducer.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);

            // FileInputFormat.addInputPath(job, new
            // Path(paths.get("Step4Input")));
            FileInputFormat.setInputPaths(job,
                    new Path[] { new Path(paths.get("Step4Input1")),
                            new Path(paths.get("Step4Input2")) });
            Path outpath = new Path(paths.get("Step4Output"));
            if (fs.exists(outpath)) {
                fs.delete(outpath, true);
            }
            FileOutputFormat.setOutputPath(job, outpath);

            boolean f = job.waitForCompletion(true);
            return f;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    static class Step4_Mapper extends Mapper<LongWritable, Text, Text, Text> {
        private String flag;// A同现矩阵 or B得分矩阵

        //每个maptask，初始化时调用一次
        protected void setup(Context context) throws IOException,
                InterruptedException {
            FileSplit split = (FileSplit) context.getInputSplit();
            flag = split.getPath().getParent().getName();// 判断读的数据集

            System.out.println(flag + "**********************");
        }

        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String[] tokens = Pattern.compile("[\t,]").split(value.toString());
            if (flag.equals("step3")) {// 同现矩阵
                //i100:i125    1
                String[] v1 = tokens[0].split(":");
                String itemID1 = v1[0];
                String itemID2 = v1[1];
                String num = tokens[1];
                //A:B 3
                //B:A 3
                Text k = new Text(itemID1);// 以前一个物品为key 比如i100
                Text v = new Text("A:" + itemID2 + "," + num);// A:i109,1

                context.write(k, v);

            } else if (flag.equals("step2")) {// 用户对物品喜爱得分矩阵
                
                //u26    i276:1,i201:1,i348:1,i321:1,i136:1,
                String userID = tokens[0];
                for (int i = 1; i < tokens.length; i++) {
                    String[] vector = tokens[i].split(":");
                    String itemID = vector[0];// 物品id
                    String pref = vector[1];// 喜爱分数

                    Text k = new Text(itemID); // 以物品为key 比如：i100
                    Text v = new Text("B:" + userID + "," + pref); // B:u401,2

                    context.write(k, v);
                }
            }
        }
    }

    static class Step4_Reducer extends Reducer<Text, Text, Text, Text> {
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            // A同现矩阵 or B得分矩阵
            //某一个物品，针对它和其他所有物品的同现次数，都在mapA集合中
            Map<String, Integer> mapA = new HashMap<String, Integer>();// 和该物品（key中的itemID）同现的其他物品的同现集合// 。其他物品ID为map的key，同现数字为值
            Map<String, Integer> mapB = new HashMap<String, Integer>();// 该物品（key中的itemID），所有用户的推荐权重分数。

            
            //A  > reduce   相同的KEY为一组
            //value:2类:
            //物品同现A:b:2  c:4   d:8
            //评分数据B:u1:18  u2:33   u3:22
            for (Text line : values) {
                String val = line.toString();
                if (val.startsWith("A:")) {// 表示物品同现数字
                    // A:i109,1
                    String[] kv = Pattern.compile("[\t,]").split(
                            val.substring(2));
                    try {
                        mapA.put(kv[0], Integer.parseInt(kv[1]));
                                        //物品同现A:b:2  c:4   d:8
                        //基于 A,物品同现次数
                    } catch (Exception e) {
                        e.printStackTrace();
                    }

                } else if (val.startsWith("B:")) {
                     // B:u401,2
                    String[] kv = Pattern.compile("[\t,]").split(
                            val.substring(2));
                            //评分数据B:u1:18  u2:33   u3:22        
                    try {
                        mapB.put(kv[0], Integer.parseInt(kv[1]));
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            }

            double result = 0;
            Iterator<String> iter = mapA.keySet().iterator();//同现
            while (iter.hasNext()) {
                String mapk = iter.next();// itemID
                int num = mapA.get(mapk).intValue();  //对于A的同现次数
                Iterator<String> iterb = mapB.keySet().iterator();//评分
                while (iterb.hasNext()) {
                    String mapkb = iterb.next();// userID
                    int pref = mapB.get(mapkb).intValue();
                    result = num * pref;// 矩阵乘法相乘计算

                    Text k = new Text(mapkb);  //用户ID为key
                    Text v = new Text(mapk + "," + result);//基于A物品,其他物品的同现与评分(所有用户对A物品)乘机
                    context.write(k, v);
                }
            }
        }
    }
}
package test.mr.itemcf;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.log4j.Logger;


/**
 * 
 * 把相乘之后的矩阵相加获得结果矩阵
 * 
 * @author root
 *
 */
public class Step5 {
    private final static Text K = new Text();
    private final static Text V = new Text();

    public static boolean run(Configuration config, Map<String, String> paths) {
        try {
            FileSystem fs = FileSystem.get(config);
            Job job = Job.getInstance(config);
            job.setJobName("step5");
            job.setJarByClass(StartRun.class);
            job.setMapperClass(Step5_Mapper.class);
            job.setReducerClass(Step5_Reducer.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);

            FileInputFormat
                    .addInputPath(job, new Path(paths.get("Step5Input")));
            Path outpath = new Path(paths.get("Step5Output"));
            if (fs.exists(outpath)) {
                fs.delete(outpath, true);
            }
            FileOutputFormat.setOutputPath(job, outpath);

            boolean f = job.waitForCompletion(true);
            return f;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    static class Step5_Mapper extends Mapper<LongWritable, Text, Text, Text> {

        /**
         * 原封不动输出
         */
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String[] tokens = Pattern.compile("[\t,]").split(value.toString());
            Text k = new Text(tokens[0]);// 用户为key
            Text v = new Text(tokens[1] + "," + tokens[2]);
            context.write(k, v);
        }
    }

    static class Step5_Reducer extends Reducer<Text, Text, Text, Text> {
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            Map<String, Double> map = new HashMap<String, Double>();// 结果

            //u3  >  reduce
            //101, 11
            //101, 12
            //101, 8
            //102, 12
            //102, 32
        
            for (Text line : values) {// i9,4.0
                String[] tokens = line.toString().split(",");
                String itemID = tokens[0];
                Double score = Double.parseDouble(tokens[1]);

                if (map.containsKey(itemID)) {
                    map.put(itemID, map.get(itemID) + score);// 矩阵乘法求和计算
                } else {
                    map.put(itemID, score);
                }
            }

            Iterator<String> iter = map.keySet().iterator();
            while (iter.hasNext()) {
                String itemID = iter.next();
                double score = map.get(itemID);
                Text v = new Text(itemID + "," + score);
                context.write(key, v);
            }
        }

    }
}
package test.mr.itemcf;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 
 * 按照推荐得分降序排序，每个用户列出10个推荐物品
 * 
 * @author root
 *
 */
public class Step6 {
    private final static Text K = new Text();
    private final static Text V = new Text();

    public static boolean run(Configuration config, Map<String, String> paths) {
        try {
            FileSystem fs = FileSystem.get(config);
            Job job = Job.getInstance(config);
            job.setJobName("step6");
            job.setJarByClass(StartRun.class);
            job.setMapperClass(Step6_Mapper.class);
            job.setReducerClass(Step6_Reducer.class);
            job.setSortComparatorClass(NumSort.class);
            job.setGroupingComparatorClass(UserGroup.class);
            job.setMapOutputKeyClass(PairWritable.class);
            job.setMapOutputValueClass(Text.class);

            FileInputFormat
                    .addInputPath(job, new Path(paths.get("Step6Input")));
            Path outpath = new Path(paths.get("Step6Output"));
            if (fs.exists(outpath)) {
                fs.delete(outpath, true);
            }
            FileOutputFormat.setOutputPath(job, outpath);

            boolean f = job.waitForCompletion(true);
            return f;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

    static class Step6_Mapper extends Mapper<LongWritable, Text, PairWritable, Text> {

        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String[] tokens = Pattern.compile("[\t,]").split(value.toString());
            String u = tokens[0];
            String item = tokens[1];
            String num = tokens[2];
            PairWritable k =new PairWritable();
            k.setUid(u);
            k.setNum(Double.parseDouble(num));
            V.set(item+":"+num);
            context.write(k, V);

        }
    }

    static class Step6_Reducer extends Reducer<PairWritable, Text, Text, Text> {
        protected void reduce(PairWritable key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            int i=0;
            StringBuffer sb =new StringBuffer();
            for(Text v :values){
                if(i==10)
                    break;
                sb.append(v.toString()+",");
                i++;
            }
            K.set(key.getUid());
            V.set(sb.toString());
            context.write(K, V);
        }

    }
    
    static class PairWritable implements WritableComparable<PairWritable>{

//        private String itemId;
        private String uid;
        private double num;
        public void write(DataOutput out) throws IOException {
            out.writeUTF(uid);
//            out.writeUTF(itemId);
            out.writeDouble(num);
        }

        public void readFields(DataInput in) throws IOException {
            this.uid=in.readUTF();
//            this.itemId=in.readUTF();
            this.num=in.readDouble();
        }

        public int compareTo(PairWritable o) {
            int r =this.uid.compareTo(o.getUid());
            if(r==0){
                return Double.compare(this.num, o.getNum());
            }
            return r;
        }

        public String getUid() {
            return uid;
        }

        public void setUid(String uid) {
            this.uid = uid;
        }

        public double getNum() {
            return num;
        }

        public void setNum(double num) {
            this.num = num;
        }
        
    }
    
    static class NumSort extends WritableComparator{
        public NumSort(){
            super(PairWritable.class,true);
        }
        
        public int compare(WritableComparable a, WritableComparable b) {
            PairWritable o1 =(PairWritable) a;
            PairWritable o2 =(PairWritable) b;
            
            int r =o1.getUid().compareTo(o2.getUid());
            if(r==0){
                return -Double.compare(o1.getNum(), o2.getNum());
            }
            return r;
        }
    }
    
    static class UserGroup extends WritableComparator{
        public UserGroup(){
            super(PairWritable.class,true);
        }
        
        public int compare(WritableComparable a, WritableComparable b) {
            PairWritable o1 =(PairWritable) a;
            PairWritable o2 =(PairWritable) b;
            return o1.getUid().compareTo(o2.getUid());
        }
    }
}

【Hadoop学习之十三】MapReduce案例分析五-ItemCF

猜你喜欢