使用numpy和sklearn中封装好的工具进行计算相似度
当数据为二维矩阵时,以下代价计算的是行与行之间的相似度,D若为相似度矩阵,即Dij表示第i行与第j行数据之间的相似度,对角线的数据为第i行和第i行的自相关相似度,对角线为1
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def calcu_similarity(features, method='pearson'):
if method == 'pearson':
similarity = np.corrcoef(features) #pearson相似度
elif method == 'cosine':
similarity = cosine_similarity(features) #余弦相似度
return similarity
data = np.array([[1, 4, 3], [0, 5, 6], [1, 2, 3]])
similarity = calcu_similarity(data, method='pearson')
print(similarity)
"""
[[1. 0.8824975 0.65465367]
[0.8824975 1. 0.93325653]
[0.65465367 0.93325653 1. ]]
"""
similarity = calcu_similarity(data, method='cosine')
print(similarity)
"""
[[1. 0.95418373 0.94345635]
[0.95418373 1. 0.95814028]
[0.94345635 0.95814028 1. ]]
"""