[Web] notas de estudio de búsqueda - nivel de convergencia algoritmo de agrupamiento HAC

convergencia de los niveles de agrupamiento (Hierarchical Clustering Aglomerativo, HAC)
algoritmo es el siguiente:
algoritmo de HAC
Aquí Insertar imagen Descripción

Enlaces relacionados:
algoritmo de agrupamiento jerárquico y principios de ejecución

Estudio Notas:
En primer lugar fijar la pieza de código en el enlace:

// HAC_learning.cpp: 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include <iostream>
#include <vector>
#include <algorithm>
#include <fstream>
using namespace std;
const int iniClusNum = 12;
const int stopNum = 3;

class Point
{
public:
	double x;
	double y;
	int NumPBelong;
	Point()
	{
		x = 0;
		y = 0;
		NumPBelong = -1;
	}
	Point(double x1, double y1, int f = -1) :x(x1), y(y1), NumPBelong(f) {}
	const Point& operator=(const Point& p)
	{
		x = p.x;
		y = p.y;
		NumPBelong = p.NumPBelong;
		return *this;
	}
};

class ManagerP
{
public:
	double getDistance(const Point& p1, const Point& p2)
	{
		return sqrt(pow((p1.x - p2.x), 2) + pow((p1.y - p2.y), 2));
	}
	Point getMean(const Point& p1, const Point& p2)
	{
		Point p;
		p.x = (p1.x + p2.x) / 2;
		p.y = (p1.y + p2.y) / 2;
		return p;
	}
};

class ManagerC
{
public:
	Point Cluster[iniClusNum];
	vector<int> ClusterLast[iniClusNum];
	bool isIndexClose[iniClusNum];
	bool isIndexClose2[iniClusNum];
	void initCluster()//use txt to init, import txt
	{
		ifstream  myfile("point.txt");
		if (!myfile)
		{
			cout << "cannot open file.";   return;
		}

		Point p;
		int x, y;
		int i = 0;
		while (!myfile.eof())
		{
			myfile >> x >> y;
			p.x = x;
			p.y = y;
			Cluster[i] = p;
			i++;
		}
		myfile.close();
	}
	void initIndexClose()
	{
		for (int i = 0; i<iniClusNum; i++)
		{
			isIndexClose[i] = false;
			isIndexClose2[i] = false;
		}
	}
	void print()
	{
		for (int i = 0; i<iniClusNum; i++)
		{
			if (ClusterLast[i].empty())
			{
				continue;
			}
			cout << "cluster " << i + 1 << endl;
			vector<int>::iterator ite = ClusterLast[i].begin();
			for (; ite != ClusterLast[i].end(); ite++)
			{
				cout << *ite << "\t";
			}
			cout << endl;

		}
		cout << endl;
	}
	void ClusterAlgo()//use minheap to realize, to optimize
	{

		int ClustNum = iniClusNum;
		int clus_index = 0;
		while (ClustNum>stopNum)
		{

			double min = INT_MAX;
			int x = -1, y = -1;
			ManagerP mp;
			for (int i = 0; i<iniClusNum; i++)
			{
				if (isIndexClose[i])
				{
					continue;
				}
				for (int j = i + 1; j<iniClusNum; j++)
				{
					if (isIndexClose[j])
					{
						continue;
					}

					double new_d = mp.getDistance(Cluster[i], Cluster[j]);
					if (new_d < min)
					{
						min = new_d;
						x = i; y = j;

					}
				}
			}
			if (x == -1 || y == -1)
			{
				break;
			}

			Point p = mp.getMean(Cluster[x], Cluster[y]);
			//x<y	store the result
			if (Cluster[x].NumPBelong == -1 && Cluster[y].NumPBelong == -1)
			{
				cout << "a0" << endl;
				ClusterLast[clus_index].push_back(x);//xchange to p, y close
				ClusterLast[clus_index].push_back(y);
				p.NumPBelong = clus_index;
				isIndexClose[y] = true;//y is closed
				Cluster[x] = p;//new p is open
				isIndexClose[x] = false;
				isIndexClose2[x] = true;
				isIndexClose2[y] = true;
				clus_index++;

			}
			else if (Cluster[x].NumPBelong == -1 && Cluster[y].NumPBelong != -1)//already exists one cluster
			{
				cout << "a1" << endl;
				ClusterLast[Cluster[y].NumPBelong].push_back(x);
				isIndexClose[x] = true;//x is closed
				p.NumPBelong = Cluster[y].NumPBelong;
				Cluster[y] = p;//new p is open
				isIndexClose2[x] = true;
			}
			else if (Cluster[x].NumPBelong != -1 && Cluster[y].NumPBelong == -1)
			{
				cout << "a2" << endl;
				ClusterLast[Cluster[x].NumPBelong].push_back(y);
				isIndexClose[y] = true;//y is closed
				p.NumPBelong = Cluster[x].NumPBelong;
				Cluster[x] = p;//new p is open
				isIndexClose2[y] = true;
			}
			else if (Cluster[x].NumPBelong != -1 && Cluster[y].NumPBelong != -1)//both are clusteroid
			{
				cout << "a3" << endl;
				vector<int>::iterator ite = ClusterLast[Cluster[y].NumPBelong].begin();//put y's node in x
				for (; ite != ClusterLast[Cluster[y].NumPBelong].end(); ite++)
				{
					ClusterLast[Cluster[x].NumPBelong].push_back(*ite);
				}
				ClusterLast[Cluster[y].NumPBelong].clear();
				isIndexClose[y] = true;//y is closed
				p.NumPBelong = Cluster[x].NumPBelong;
				Cluster[x] = p;//new p is open

			}
			ClustNum--;
		}
		int total_size = 0;
		for (int i = 0; i<stopNum; i++)
		{
			total_size += ClusterLast[i].size();
		}
		if (total_size<iniClusNum)
		{
			int j = 0;
			for (int i = 0; i<iniClusNum; i++)
			{
				if (isIndexClose2[i] == false)
				{
					ClusterLast[stopNum - 1 - j].push_back(i);
					j++;
				}
			}

		}
	}

};

int main()
{
	ManagerC M;
	M.initCluster();
	M.initIndexClose();
	M.ClusterAlgo();
	M.print();

	system("pause");
}

Lo cual,
point.txt:

4 10
4 8
7 10
6 8
3 4
2 2
5 2
9 3
10 5
11 4
12 3
12 6

El resultado:
Aquí Insertar imagen Descripción
un boceto:

Código apreciado:
definir un punto de la clase, que contiene X, Y (coordenadas) y NumPBelong (categoría)
M.initCluster (); datos de entrada
M.initIndexClose (); abierto todos los puntos (ajustado a falso)
M.ClusterAlgo (); agrupar el HAC
M.print (); agrupación resultado de impresión

Detallado comprender M.ClusterAlgo ();

设定预期分类数stopNum，令实际分类数初始值为输入数据数（每个数据自成一类）
当（实际分类数>预期分类数stopNum）时
{
	clus_index=0；
	
	在x,y的isIndexClose值都不为true（关）的条件下（不为中心点/没有改变过）
		找出距离最小的两个点x,y并计算他们的距离min
	如果所有的点的isIndexClose值都为true（关）
		跳出循环
		
	//计算x,y的中点p
	a0:如果x，y都不是某类的中心点
		将x,y分入第clus_index类
		关闭y（isIndexClose=true）（y不再是某类中心点）
		把x,y的中点设为x，并令这个新的x为此类的中心点
		打开x（isIndexClose=true）（x为此类中心点）
		标记x,y（isIndexClose2= true;）
	a1：如果x不是某类中心点，y是某类中心点
		将x放入y所在的类别中
		关闭x
		把x,y的中点设为y
		标记x
	a2：如果x是某类中心点，y不是某类中心点
		将y放入x所在的类别中
		关闭y
		把x,y的中点设为x
		标记y
	a3：如果x，y都是中心点
		把y所在类别的所有点移入x所在类别中
		关闭y
		把x,y的中点设为x
实际分类数-1
}
计算所有已分类类别中数据的总数total_size
如果已分类数据总数total_size<原始输入数据总数iniClusNum（有数据未分类）
{
	对每一个数进行检查，将所有未标记数（isIndexClose2 == false）平均分给各个类
}

weixin_42176221

Publicado 43 artículos originales · ganado elogios 4 · Vistas 1203

carta privada preocupaciones

[Web] notas de estudio de búsqueda - nivel de convergencia algoritmo de agrupamiento HAC

Supongo que te gusta