Pattern recognition - functions to calculate pattern similarity - functions to calculate similarity measures between samples - functions to calculate various distances

Organize various calculation functions for similarity measures between samples, that is, functions for calculating various distances.

import numpy as np
from numpy import *
from scipy.spatial.distance import pdist

x=np.random.random(10)
y=np.random.random(10)

'''
x: [0.05627679 0.80556938 0.48002662 0.24378563 0.75763754 0.15353348
 0.54491664 0.1775408  0.50011986 0.55041845]
y: [0.50068882 0.12200178 0.79041352 0.07332715 0.017892   0.57880032
 0.56707591 0.48390753 0.631051   0.20035466]
'''

#曼哈顿距离
d1=np.sum(np.abs(x-y))
print('曼哈顿距离d1:',d1)

#欧式距离
dist1 = np.sqrt(np.sum(np.square(x - y)))
print('x', x)
print('y', y)
print('dist1:', dist1)

#切比雪夫距离
d1=np.max(np.abs(x-y))
print('d1:',d1)

#标准化欧氏距离
X=np.vstack([x,y])
sk=np.var(X,axis=0,ddof=1)
d1=np.sqrt(((x - y) ** 2 /sk).sum())
print('d1:',d1)

#马氏距离
X = np.vstack([x, y])
XT = X.T
S = np.cov(X)  # 两个维度之间协方差矩阵
SI = np.linalg.inv(S)  # 协方差矩阵的逆矩阵
# 马氏距离计算两个样本之间的距离,此处共有10个样本,两两组合,共有45个距离。
n = XT.shape[0]
d1 = []
for i in range(0, n):
    for j in range(i + 1, n):
        delta = XT[i] - XT[j]
        d = np.sqrt(np.dot(np.dot(delta, SI), delta.T))
        d1.append(d)
print('d1:', d1)

#夹角余弦
dist1 = 1 - np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))

#汉明距离
matV = mat([[1,1,0,1,0,1,0,0,1],[0,1,1,0,0,0,1,1,1]])
smstr = nonzero(matV[0]-matV[1])
print(shape(smstr[0])[0])
x=np.random.random(10)>0.5
y=np.random.random(10)>0.5
x=np.asarray(x,np.int32)
y=np.asarray(y,np.int32)
d1=np.mean(x!=y)
print('d1:', d1)

#杰卡德相似系数
matV = mat([[1,1,0,1,0,1,0,0,1],[0,1,1,0,0,0,1,1,1]])
print ("dist.jaccard:", pdist(matV,'jaccard'))
x = np.random.random(10) > 0.5
y = np.random.random(10) > 0.5
x = np.asarray(x, np.int32)
y = np.asarray(y, np.int32)
up = np.double(np.bitwise_and((x != y), np.bitwise_or(x != 0, y != 0)).sum())
down = np.double(np.bitwise_or(x != 0, y != 0).sum())
d1 = (up / down)
print('d1:', d1)

#布雷柯蒂斯距离
x = np.array([11, 0, 7, 8, 0])
y = np.array([24, 37, 5, 18, 1])
up = np.sum(np.abs(y - x))
down = np.sum(x) + np.sum(y)
d1 = (up / down)
print('d1:', d1)

Guess you like

Origin blog.csdn.net/CTaaaaa/article/details/127948515