python实现:欧氏距离/余弦相似度/皮尔逊相似度

一. 欧氏距离

距离越小,相似度越高

def EuclideanDistances(A, B):
    BT = B.transpose()
    vecProd = np.dot(A,BT)
    SqA =  A**2
    sumSqA = np.matrix(np.sum(SqA, axis=1))
    sumSqAEx = np.tile(sumSqA.transpose(), (1, vecProd.shape[1]))

    SqB = B**2
    sumSqB = np.sum(SqB, axis=1)
    sumSqBEx = np.tile(sumSqB, (vecProd.shape[0], 1))
    SqED = sumSqBEx + sumSqAEx - 2*vecProd
    SqED[SqED<0]=0.0
    ED = np.sqrt(SqED)
    return ED

A = np.array(
[[0, 1, 0, 0, 1],
[0, 0, 1, 1, 1],
[0, 1, 0, 0, 1]])

Euclidean_dis=EuclideanDistances(A,A)
print (Euclidean_dis)

# 欧几里得距离
from scipy.spatial.distance import cdist
A = np.array(
[[0, 1, 0, 0, 1],
[0, 0, 1, 1, 1],
[0, 1, 0, 0, 1]])
dis = cdist(A,A,metric='euclidean')
print(dis)

二. 余弦相似度:

# 余弦相似度
def cosine_distance(matrix1,matrix2):
    matrix1_matrix2 = np.dot(matrix1, matrix2.transpose())
    matrix1_norm = np.sqrt(np.multiply(matrix1, matrix1).sum(axis=1))
    matrix1_norm = matrix1_norm[:, np.newaxis]
    matrix2_norm = np.sqrt(np.multiply(matrix2, matrix2).sum(axis=1))
    matrix2_norm = matrix2_norm[:, np.newaxis]
    cosine_distance = np.divide(matrix1_matrix2, np.dot(matrix1_norm, matrix2_norm.transpose()))
    return cosine_distance

A = np.array(
[[0, 1, 0, 0, 1],
[0, 0, 1, 1, 1],
[0, 1, 0, 0, 1]])

cosine_dis=cosine_distance(A,A)
print (cosine_dis)

# 余弦相似度
from sklearn.metrics.pairwise import cosine_similarity

A = np.array(
[[0, 1, 0, 0, 1],
[0, 0, 1, 1, 1],
[0, 1, 0, 0, 1]])
cosine_dis2 = cosine_similarity(A,A)
print('cosine_dis2:\n',cosine_dis2)

三.  皮尔逊相似度

# 皮尔逊相似度  参考https://cloud.tencent.com/developer/ask/181044
import numpy as np
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine

A = np.array(
[[0, 1, 0, 0, 1],
[0, 0, 1, 1, 1],
[0, 1, 0, 0, 1]])


dist_out = 1-pairwise_distances(A, metric="cosine")
dist_out

参考:

https://www.jianshu.com/p/3eaa970bd45c

https://cloud.tencent.com/developer/ask/181044

发布了118 篇原创文章 · 获赞 132 · 访问量 60万+

猜你喜欢

转载自blog.csdn.net/qfikh/article/details/103356340