购买过该商品的顾客还购买过哪些商品 给一个商品,推荐购买过这个商品的用户经常购买的五件产品
输出是键值对,键是商品,值是5个商品的列表
map1
//key=userid value=useriD购买过的产品
map (userID,item){
emit(userID,item);}
reduce1
reduce(userID,item[i1,i2,...in]){
emit(useriD,item)}
map2
map(userID,items[i1,i2,...in]){
for (Item item:items){
Map<Item,Integer> map= new HashMap<Item.Integer>();
for (Item j :items){map(i)=map(j)+1;}
}
emit(item,map);
}
reduce2
reduce(item, stripes[M1,M2,...Mn]){
Map<Item,Integer> final = new HashMap<Item,Integer>();
for(Map<Item,Integer> map:stripes){
for (all(k,v) in map){final(k)=final(k)+v;}}
emit(key, top(5,final))
}
推荐共同好友 当两个用户有共同好友时,将其互相推荐给对方
期望输出
<user>:F<M:[I1,I2...]>
F是推荐给user的一个好友,M是共同好友个数,I1,I2...是共同好友ID
//key person //value=friends 是person的直接好友列表
map(key,friends){
for (friend:friends){
directFriend=Tuple2(friend,-1)//用-1表明是直接好友
emit(key, directFirend)
}
for(int i =0;i<friends.size(),i++){
for(int j = i+1; j<friends.size(), j++){
possibleFriend1=Tuple2(friends.get(j),persion);
emit(firends.get(i),friends.get(j),person);
possibleFriend2= Tuple(friends.get(i),persion);
emit(friends.get(j),possbleFriend2);
}
}
}
//key persion // value 是可能的推荐好友List<Tuple2<userID,userID>>,前一个user是可能推荐的,后一个是共同好友
reduce(key,values){
Map<Long,List> mutualFriends = new HashMap<Long,List>();
for(Tuple2<touser,mutualfriend> t2 : values){
Long touser=t2.touser;
Long mutualfriend =t2.mutualfriend;
boolean alreadyfriend = (mutualfriend == -1)
if(mutualFriends.contanskey(touser)){
if(alreadyFriend){mutualFriends.put(touser, null);}
else if (mutualFriends.get(touser)!=null){mutualFriends.get(touser).add(mutualFriend);}
}
else{
if(alreadyfriend){mutualFriends.put(touser, null);}
elseif{mutualFriends.put(touser,List<mutualFriend>)}
}
}
}
spark 实现
public class SparkFriendRecommendation{
public static void main(String[] args) throws Exception{
//处理输入参数
//创建spark上下文对象
JavaSparkContext ctx = new JavaSparkContext();
//读取hdfs创建RDD
JavaRDD<String> records = ctx.textfile("",1);
//实现map
JavaPairRDD<Long,Tuple2<Long,Long>> pairs = records.flatMapToPair(new PairFlatMapFunction<
String, Long,Tuple2<Long,Long>>(){
public Iterable<Tuple2<Long,Tuple2<Long,Long>>>call(String s){
String[] tokens=s.split(",");
Long person = Long.parseLong(tokens[0]);
String friendsAsString = tokens[1];
String[] friendstokenized=friendsAsString.split(" ");
List<Long> friends =new ArrayList<Long>();
List<Tuple2<Long,Tuple2<Long,Long>>> mapperoutput=new ArrayList<Tuple2<Long,Tuple2<Long,Long>>>();
for (String friendasstring :friendstokenized){
long touser = Long.parseLong(friendasstring);
friends.add(touser);
Tuple2<Long,Long> directfirend=new Tuple2<Long,Long>(touser, -1);
mapperoutput.add(new Tuple2<Long, Tuple<Long,Long>>(person, directfirend));
}
for (int i =0;i<friends.size();i++){
for ( int j =i+1; j<frineds.size(), j++){
Tuple2<Long,Long> possibleFriend1=T2(friends.get(j),person);
mapperoutput.add(T2(firends.get(i),possibelFriend1));
Tuple2<Long,Long> possibleFriend2=T2(friends.get(i),person);
mapperoutput.add(T2(firends.get(j),possibelFriend1));
}
}
return mapperout;
}})
//实现reduce
JavaPairRDD<Long, Iterable<Tuple2<Long,Long>>> grouped=paires.groupByKey();
//生成最终结果
JavaPairRDD<Long,String>recommendations=grouped.mapValues(
new Function<Iterable<Tuple2<Long,Long>>,String>(){
public String call(Iterable<Tuple2<Long,Long>> values){
final Map<Long,List<Long>>mutualFriends= new HashMap<Long,List<Long>>();
for (Tuple2<Long,Long> t2: values){
final Long touser=t2._1;
final Long mutualFriend=t2._2;
final boolean alreadyFriend =(mutualFriend ==-1);
if (mutualFriends.contansKey(touser)){
if(alreadyFriend){mutualFriends.put(touser,null);}
else if (mutualFriends.get(touser)!=null){ mutualFriends.get(touser).add(mutualFirend);}
}
else{
if(alreadyFriend){mutualFriends.put(touser,null);}
else{List<Long> list1=new ArrayList<Long>(Array.asList(mutualFriend));
mutualFriend.put(touser, list1);}
}
}
}
return buildRecommendations(mutualFriends);
}
})
//
//
//
}
}