C++ 【欧式距离、余弦相似度】相似度计算理解(附源码)

1、区别

        

        欧式距离衡量空间点的直线距离,余弦距离衡量点在空间的方向差异。欧式距离越小相似,余弦值越大越相似

        理解:欧氏度量衡量数值上差异的绝对值,余弦相似度衡量的是维度间相对层面的差异

2、计算公式

        欧式距离:

        dist(A, B) = \parallel A - B\parallel _{2} = \sqrt{\sum_{i=1}^{n}(x_{i} - y_{i})^{2}}

        余弦相似度:

        cos(A, B) = \frac{A\cdot B}{\parallel A\parallel _{2}\parallel B\parallel _{2}} = \frac{\sum_{i=1}^{n}(x_{i}\times y_{i})}{\sqrt{\sum_{i=1}^{n}(x_{i})^{2}}\sqrt{\sum_{i=1}^{n}(y_{i})^{2}}}         

3、取值范围

        欧式距离取值范围:[0, +∞)

        余弦相似度取值范围:[-1, 1]

4、源码

       欧式距离

       【Mat类型】

double euclidean_distance(Mat baseImg, Mat targetImg)
{
	double sumDescriptor = 0;
	for (int i = 0; i < baseImg.cols; i++)
	{
		double numBase = abs(baseImg.at<float>(0, i));
		double numTarget = abs(targetImg.at<float>(0, i));
		sumDescriptor += pow(numBase - numTarget, 2);
	}
	double simility = sqrt(sumDescriptor);
	return simility;
}

        【vector类型】

double euclidean_distance(vector<double>& base, vector<double>& target)
{
	double sumDescriptor = 0;
	for (int i = 0; i < base.size(); i++)
	{
		sumDescriptor += std::pow(base[i] - target[i], 2);
	}
	double simility = std::pow(sumDescriptor, 0.5);
	return simility;
}

        余弦相似度

        【Mat类型】

double cos_distance(Mat baseImg, Mat targetImg)
{
	double squSumB = 0;
	double squSumT = 0;
	double innerPro = 0;
	for (int i = 0; i < baseImg.cols; i++)
	{
		double numBase = abs(baseImg.at<float>(0, i));
		double numTarget = abs(targetImg.at<float>(0, i));
		squSumB = squSumB + numBase * numBase;
		squSumT = squSumT + numTarget * numTarget;

		innerPro = innerPro + numBase * numTarget;
	}
	double modB = sqrt(squSumB);
	double modT = sqrt(squSumT);
	double simility = innerPro / (modB*modT);
	return simility;
}

        【vector类型】

double getMold(const vector<double>& vec)
{
	int n = vec.size();
	double sum = 0.0;
	for (int i = 0; i < n; ++i)
		sum += vec[i] * vec[i];
	return sqrt(sum);
}
double cos_distance(const vector<double>& base, const vector<double>& target)
{
	int n = base.size();
	assert(n == target.size());
	double tmp = 0.0;
	for (int i = 0; i < n; ++i)
		tmp += base[i] * target[i];
	double simility =  tmp / (getMold(base)*getMold(target));
	return simility;
}

猜你喜欢

转载自blog.csdn.net/Gary_ghw/article/details/125652508
今日推荐