MapReduce实现共同好友推荐

package com.qianfeng.friendrecommendation;

import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class ShareFriend {
	static class ShareFriendMapper extends Mapper<LongWritable, Text, Text, Text>{
 
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			//value:input文件的内容
			String line = value.toString();
			String[] person_friends = line.split(":");
			String person = person_friends[0];
			String[] friends = person_friends[1].split(",");
			
			for(String friend : friends){
				//context是map的一个内部类,记录map执行的上下文,将结果传递到reduce方法中
				context.write(new Text(friend), new Text(person));
			}
		}
	}
	/**
	 * @author yaojin
		第二阶段的map函数主要完成以下任务
		1.将上一阶段reduce输出的<朋友,拥有这名朋友的所有人>信息中的 “拥有这名朋友的所有人”进行排序 ,以防出现B-C C-B这样的重复
		2.将 “拥有这名朋友的所有人”进行两两配对,并将配对后的字符串当做键,“朋友”当做值输出,即输出<人-人,共同朋友>
	 */
	static class ShareFriendMapper2 extends Mapper<LongWritable, Text, Text, Text>{

		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			String line = value.toString();
			String[] friend_persons = line.split("\t");
			String friend = friend_persons[0];
			String[] persons = friend_persons[1].split(",");
			Arrays.sort(persons); //排序
			
			//两两配对
			for(int i=0;i<persons.length-1;i++){
				for(int j=i+1;j<persons.length;j++){
					context.write(new Text(persons[i]+"-"+persons[j]+":"), new Text(friend));
				}
			}
		}
		
	}
	/**
	 * @author yaojin
		<人-人,list(共同朋友)> 中的“共同好友”进行拼接 最后输出<人-人,两人的所有共同好友>
		遍历每一个经过map2生成的两两配对,作为key,通过判断是否存在共同好友,存在,将结果进行拼接,写入到output1目录中并生成文件
	 */	
	static class ShareFriendReducer2 extends Reducer<Text, Text, Text, Text>{

		@Override
		protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			StringBuffer sb = new StringBuffer();
			Set<String> set = new HashSet<String>();
			for(Text friend : values){
				if(!set.contains(friend.toString()))
					set.add(friend.toString());
			}
			for(String friend : set){
				sb.append(friend.toString()).append(",");
			}
			sb.deleteCharAt(sb.length()-1);
			
			context.write(key, new Text(sb.toString()));
		}
		
	}
	static class ShareFriendReducer extends Reducer<Text, Text, Text, Text>{

		@Override
		protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			//key是每一个人
			//sb最后的拼接结果是每一个人是哪些人的共同好友
			//A->C,F,B,D
			StringBuffer sb = new StringBuffer();
			for(Text friend : values){
				sb.append(friend.toString()).append(",");
			}
			sb.deleteCharAt(sb.length()-1);
			context.write(key, new Text(sb.toString()));
		}
	}
	public static void main(String[] args) {
//		System.setProperty("hadoop.home.dir", "E:\\hadoop-common-2.2.0-bin-master");
		Configuration config = new Configuration();
		org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ShareFriend.class);
		try {
			//获取job实例
			Job job = Job.getInstance(config);
			job.setJarByClass(ShareFriend.class);
			job.setMapperClass(ShareFriendMapper.class);
			job.setReducerClass(ShareFriendReducer.class);
			//设置输出的key的类型
			job.setOutputKeyClass(Text.class);
			job.setOutputFormatClass(TextOutputFormat.class);
			//设置输出的value的类型
			job.setOutputValueClass(Text.class);
			//读取原文件,并调用map方法
			FileInputFormat.setInputPaths(job, new Path("E:/qianfeng/bigData/mapreduce查找共同好友/input/input.txt"));
			//将map的context结果作为入参传入到reduce中,将结果写入到目标文件中
			FileOutputFormat.setOutputPath(job, new Path("E:/qianfeng/bigData/mapreduce查找共同好友/output/output1.txt"));
			boolean res = job.waitForCompletion(true);
			logger.info(res);
			
			//第二阶段
			Job job2 = Job.getInstance(config);
			job2.setJarByClass(ShareFriend.class);
			job2.setMapperClass(ShareFriendMapper2.class);
			job2.setReducerClass(ShareFriendReducer2.class);
			
			job2.setOutputKeyClass(Text.class);
			job2.setOutputValueClass(Text.class);
			
			FileInputFormat.setInputPaths(job2, new Path("E:/qianfeng/bigData/mapreduce查找共同好友/output/output1.txt"));
			FileOutputFormat.setOutputPath(job2, new Path("E:/qianfeng/bigData/mapreduce查找共同好友/output/output2.txt"));
			
			boolean res2 = job2.waitForCompletion(true);
			
			System.exit(res2?0:1);
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
	}
}

猜你喜欢

转载自blog.csdn.net/weixin_40903057/article/details/89469388