集体智慧编程第二章

#!/usr/bin/python
#-*-coding:utf-8 -*-
from math import sqrt
# 欧几里得距离
#收集偏好
critics={"Lisa Rose":{"Lady in the Water":2.5,"Snake on  a plane ":3.5,"Juse My luck ":3.0,"Superman Returns ":3.5,"You,me and Durpee ":2.5,
                      "The Night Listener":3.0,},
         "Gene Seymour":{"Lady in the Water":3.0,"Snake on  a plane ":3.5,"Juse My luck ":1.5,"Superman Returns ":5.0,"You,me and Durpee ":3.5,
                      "The Night Listener":3.0},
         "Michael Phillips":{"Lady in the Water":2.5,"Snake on  a plane ":3.0,"Superman Returns ":3.5,
                      "The Night Listener":4.0,},
         "Claudia Puig":{"Snake on  a plane ":3.5,"Juse My luck ":3.0,"Superman Returns ":4.0,"You,me and Durpee ":2.5,
                      "The Night Listener":4.5,},
         "Mick LaSalle":{"Lady in the Water":3.0,"Snake on  a plane ":4.0,"Juse My luck ":2.0,"Superman Returns ":3.0,"You,me and Durpee ":2.0,
                      },
         "Jack Mattews":{"Lady in the Water":3.0,"Snake on  a plane ":4.0,"Juse My luck ":3.0,"Superman Returns ":5.0,"You,me and Durpee ":3.5,
                      "The Night Listener":3.0,},
         }

# 寻找相近的用户 欧几里得距离 pow(n,2)对n进行2 次方
s=1/(1+sqrt(pow(2.5-3.0,2)+pow(3.5-3.5,0)))
# print(s)

皮尔逊代码实现
# 具体的实现
def sim_distance(per,person1,person2):
    items={}
    #判断他们之间有没有对于同一个物品的评价
    for item in per[person1]:
        # for 循环得到的是 key
        # print(item)
        if item in per[person2]:
            # print(item)
            items[item]=1
    if items==None:
        return 0;
    # 计算每一轴向上的差值
    # 原理: 对每一个轴向上的差值平方后求和,在对他们进行求平方根
    sum_of_squeres=sum(pow(per[person1][item]-per[person2][item],2) for item in per[person2] if item in per[person1])
    # 最后
    return 1/(1+sqrt(sum_of_squeres))

if __name__=="__main__":
    print(sim_distance(critics,"Lisa Rose","Lisa Rose"))
    print(sim_distance(critics,"Lisa Rose","Gene Seymour"))
    print(sim_distance(critics,"Lisa Rose","Michael Phillips"))
 
 
 
 
#寻找相近的用户(person1,person2 之间的相似度,per 相当于数据源)

def sim_person(prefs,p1,p2):
    items={}
    # 查找两者都有的评论
    for item in prefs[p1]:
        if item in prefs[p2]:
         items[item]=1

    n=len(items)
    # print(str(n)+"  n")

    if n==0:
        return 1

    #评分总和
    sum_p1=sum(prefs[p1][item] for item in items)
    sum_p2=sum(prefs[p2][item] for item in items)

    #平方和
    pow_p1=sum(pow(prefs[p1][item],2) for item  in items)
    pow_p2=sum(pow(prefs[p2][item],2) for item in items )

    #对应评分乘积之和
    corresponding_p1p2=sum(prefs[p1][item]*prefs[p2][item] for item in items)
    # print(product)
    #计算皮尔逊评价值
    num=corresponding_p1p2-(sum_p1*sum_p2/n)
    den=((pow_p1-pow(sum_p1,2)/n)*(pow_p2-pow(sum_p2,2)/n))**0.5
    # print(den)
    if den==0:
        return 0
    r=num/den
    return r

猜你喜欢

转载自blog.csdn.net/qq_18617299/article/details/78755807