Recommender system - calculating user relevance fof (hadoop calculation)

Scenario: Whether it is QQ, or Weibo, Toutiao and other platforms with social attributes, in order to stick to users, they often recommend friends to users. Such friends are generally obtained from friends who have more interest in themselves or their friends. For example, "people you may know" in QQ.

fof relationship: For any user A, the friend set of user A is B. If the relationship between any two users in B is not a friend relationship, it is a fof relationship

The recommendation coefficient needs to be sorted, and the sorting is based on the number of fof relationships of the entire user group. The more the same fof relationship, the greater the correlation between the two users, and the higher the recommendation coefficient.

data:
enter description here

Define a fof relationship class

package com.sound.mr.friend;

import org.apache.hadoop.io.Text;

public class Fof extends Text{
    public Fof() {
        super();
    }

    public Fof(String a,String b){
        super(getFof(a,b));
    }

    // 对fof关系进行排序,即:小明-小红,小红-小明统一为一个
    private static String getFof(String a, String b) {
        int r0 = a.compareTo(b);
        if(r0<0){
            return a+"\t"+b;
        }else {
            return b+"\t"+a;
        }
    }


}
  • main function
package com.sound.mr.friend;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.StringUtils;


public class MainJob {
    public static void main(String[] args) {
        Configuration config = new Configuration();
        // 本地调试测试
        config.set("fs.defaultFS", "hdfs://node1:8020");
        config.set("yarn.resourcemanager.hostname", "node1");

        // 连接服务器测试
        // config.set("mapred.jar", "C:\\Users\\53033\\Desktop\\wc.jar");


        // 之所以有两个是因为,第一个用于找到所有的fof关系
        // 第二个是对fof关系进行排序
        if(runFind(config)){
            runSort(config);
        }
    }

    // 对所有的fof关系进行大小排序
    private static void runSort(Configuration config) {
        try {
            Job job = Job.getInstance(config);
            job.setJarByClass(MainJob.class);
            job.setJobName("sort fof of friends");

            job.setMapOutputKeyClass(Fofer.class);
            job.setMapOutputValueClass(NullWritable.class);

            job.setMapperClass(SortMapper.class);
            job.setReducerClass(SortReducer.class);

            job.setSortComparatorClass(MySort.class);

            job.setInputFormatClass(KeyValueTextInputFormat.class);

            FileSystem fs = FileSystem.get(config);
//          Path inPath = new Path("/usr/input/");
//          if (!fs.exists(inPath)) {
//              fs.mkdirs(inPath);
//          }
            FileInputFormat.addInputPath(job, new Path("/usr/output/friend"));

            Path outPath = new Path("/usr/output/friendSort");
            if (fs.exists(outPath)) {
                fs.delete(outPath, true);
            }
            FileOutputFormat.setOutputPath(job, outPath);

            boolean finished = job.waitForCompletion(true);

            if (finished) {
                System.out.println("finished success!");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    //  用于查找所有Fof关系 
    public static boolean runFind(Configuration config) {
        boolean finished = false;
        try {
            Job job = Job.getInstance(config);
            job.setJarByClass(MainJob.class);
            job.setJobName("find fof of friends");

            job.setMapOutputKeyClass(Fof.class);
            job.setMapOutputValueClass(IntWritable.class);

            job.setMapperClass(FofMapper.class);
            job.setReducerClass(FofReducer.class);

            job.setInputFormatClass(KeyValueTextInputFormat.class);

            FileSystem fs = FileSystem.get(config);
            Path inPath = new Path("/usr/input/");
            if (!fs.exists(inPath)) {
                fs.mkdirs(inPath);
            }
            FileInputFormat.addInputPath(job, new Path("/usr/input/"));

            Path outPath = new Path("/usr/output/friend");
            if (fs.exists(outPath)) {
                fs.delete(outPath, true);
            }
            FileOutputFormat.setOutputPath(job, outPath);

            finished = job.waitForCompletion(true);

            if (finished) {
                System.out.println("finished success!");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return finished;
    }

    static class FofMapper extends Mapper<Text, Text, Fof, IntWritable> {

        @Override
        protected void map(Text key, Text value, Context context)
                throws IOException, InterruptedException {
            String user = key.toString();
            String[] friends = StringUtils.split(value.toString(), '\t');
            for (int i = 0; i < friends.length; i++) {
                Fof ofof = new Fof(user, friends[i]);

                // 朋友关系
                context.write(ofof, new IntWritable(0));
                for(int j=i+1;j<friends.length;j++) {
                    // fof关系,但是也可能是朋友关系
                    Fof fof = new Fof(friends[i],friends[j]);
                    context.write(fof, new IntWritable(1));
                }
            }
        }

    }

    static class FofReducer extends Reducer<Fof, IntWritable, Fof, IntWritable>{

        @Override
        protected void reduce(Fof arg0, Iterable<IntWritable> arg1,
                Context arg2) throws IOException, InterruptedException {
            int sum =0;
            boolean isFriend = false;
            for(IntWritable iw: arg1) {
                if(iw.get()==0) {
                    isFriend = true;
                    break; // 等于0,表示已经是朋友关系,就不用累计计算了
                }else {
                    sum+=iw.get();
//                  sum+=1;
                }
            }
            if(!isFriend){
                arg2.write(arg0, new IntWritable(sum));
            }
        }

    }


    static class SortMapper extends Mapper<Text, Text, Fofer, NullWritable> {
        @Override
        protected void map(Text key, Text value, Context context)
                throws IOException, InterruptedException {
            // 这里注意一个情况,第一步run输出的文件,第一个间隔符在fof中间,
            // 所以key值为一个单个人名:“老王”;value为:“老宋  3”
            String[] fofs = StringUtils.split(value.toString(), '\t');
            int num = Integer.valueOf(fofs[1]);
            Fofer fofer = new Fofer();
            fofer.setFof(key.toString()+'\t'+fofs[0]);
            fofer.setNum(num);
            context.write(fofer, NullWritable.get());
        }

    }

    static class SortReducer extends Reducer<Fofer, NullWritable, Text, NullWritable>{

        @Override
        protected void reduce(Fofer arg0, Iterable<NullWritable> arg1,
                Context arg2) throws IOException, InterruptedException {
            // 直接写入
            arg2.write(new Text(arg0.getFof()+arg0.getNum()), NullWritable.get());
        }

    }
}
  • The second step of mapreduce needs to be sorted: the corresponding key and sort are as follows, please refer to the previous article
package com.sound.mr.friend;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

/**
 * @author 53033
 * 用于排序
 */
public class Fofer implements WritableComparable<Fofer>{
    private String fof;
    private int num;


    public String getFof() {
        return fof;
    }

    public void setFof(String fof) {
        this.fof = fof;
    }

    public int getNum() {
        return num;
    }

    public void setNum(int num) {
        this.num = num;
    }

    @Override
    public int compareTo(Fofer o) {
        int r0 = -Integer.compare(this.getNum(), o.getNum());


        // 这里必须要比较字符串,否则reduce时,对于num相同的,只取其中一个
        if(r0==0) {
            return -this.getFof().compareTo(o.getFof());
        }else {
            return r0;
        }
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(this.getFof());
        out.writeInt(this.getNum());
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.setFof(in.readUTF());
        this.setNum(in.readInt());
    }

}



package com.sound.mr.friend;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
 * 
 * @author 53033
 * 排序规则
 */
public class MySort extends WritableComparator {
    // 指明比较的对象,true表明创建对象mykey实例
    public MySort() {
        super(Fofer.class, true);
    }

    // 大小排序
    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        Fofer akey = (Fofer) a;
        Fofer bkey = (Fofer) b;
        return akey.compareTo(bkey);
    }

}
  • result
    enter description here

Error record:
* The project is clean
* eclipse has no DFS folder
enter description here
* visit http://node1:50070 , there is one less Live Nodes
enter description here
* Reason: storage id is wrong, inconsistent
* Solution: find the data file price of datanode, remove the current file folder, restart the datanode: hadoop-daemon.sh start datanode
* Note: Please find the wrong node; in addition, the data folder is the directory where the datanode is located, do not find the wrong one

Reference link:
https://my.oschina.net/u/3264690/blog/1377199

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326399035&siteId=291194637