k-mean实现 C语言(转载)

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
 
#define N 12
#define K 3
 
typedef struct
{
float x;
float y;
}Point;
 
int center[N];  ///  用下标表示点 用数组的值代表属于哪一个类
 
Point point[N] = {
{59.0, 82.0},  
{66.0, 73.0},  
{53.0, 68.0},  
{66.0, 58.0},  
{28.0, 64.0},  
{23.0, 53.0},  
{15.0, 66.0},  
{41.0, 58.0},  
{64.0, 35.0},
{52.0, 34.0},
{73.0, 26.0},  
{55.0, 16.0}  
};
 
Point mean[K];  ///  保存每个簇的中心点
 
float getDistance(Point point1, Point point2)
{
float d;
d = sqrt((point1.x - point2.x) * (point1.x - point2.x) + (point1.y - point2.y) * (point1.y - point2.y));
return d;
}
 
/// 计算每个簇的中心点
void getMean(int center[N])
{
Point tep;
int i, j, count = 0;
for(i = 0; i < K; ++i)
{
count = 0;
tep.x = 0.0;   /// 每算出一个簇的中心点值后清0
tep.y = 0.0;
        for(j = 0; j < N; ++j)
{
if(i == center[j])
{
count++;
tep.x += point[j].x;
tep.y += point[j].y;
}
}
tep.x /= count;
tep.y /= count;
mean[i] = tep;
}
for(i = 0; i < K; ++i)
    {
    printf("The new center point of %d is : \t( %f, %f )\n", i+1, mean[i].x, mean[i].y);
    }
}
 
/// 计算平方误差函数
float getE()//如果前后两次计算得到的结果不变 说明中心点已经确定了
{
int i, j;
float cnt = 0.0, sum = 0.0;
for(i = 0; i < K; ++i)
{
for(j = 0; j < N; ++j)
{
if(i == center[j])//判断某个点到中心点的平方差 
{
cnt = (point[j].x - mean[i].x) * (point[j].x - mean[i].x) + (point[j].y - mean[i].y) * (point[j].y - mean[i].y);
sum += cnt;
}
}
}
return sum;
}
 
/// 把N个点聚类
void cluster()
{
int i, j, q;
float min;
float distance[N][K];
for(i = 0; i < N; ++i)//计算出每个点到中心点的距离
{
min = 999999.0;
for(j = 0; j < K; ++j)
{
distance[i][j] = getDistance(point[i], mean[j]);
}
for(q = 0; q < K; ++q)//比较i离哪个中心点比较近 如果比较近的话就把
{
if(distance[i][q] < min)
{
min = distance[i][q];//保存这个点到某个中心点的最短距离 以便比较
        center[i] = q;//将这个点的中心点设置为某一个类
}
}
printf("( %.0f, %.0f )\t in cluster-%d\n", point[i].x, point[i].y, center[i] + 1);
}
printf("-----------------------------\n");
}
 
int main()
{
    int i, j, n = 0;
    float temp1;
    float temp2, t;
    printf("----------Data sets----------\n");
    for(i = 0; i < N; ++i)
    {
    printf("\t( %.0f, %.0f )\n", point[i].x, point[i].y);
    }
    printf("-----------------------------\n");
 
/*
    可以选择当前时间为随机数
    srand((unsigned int)time(NULL));
    for(i = 0; i < K; ++i)
    {
    j = rand() % K;
    mean[i].x = point[j].x;
    mean[i].y = point[j].y;
    }
*/
    mean[0].x = point[0].x;      /// 初始化k个中心点
    mean[0].y = point[0].y;
 
    mean[1].x = point[3].x;
    mean[1].y = point[3].y;
 
    mean[2].x = point[6].x;
    mean[2].y = point[6].y;
 
    cluster();          /// 第一次根据预设的k个点进行聚类
    temp1 = getE();        ///  第一次平方误差
    n++;                   ///  n计算形成最终的簇用了多少次
 
    printf("The E1 is: %f\n\n", temp1);
 
    getMean(center);
    cluster();
    temp2 = getE();        ///  根据簇形成新的中心点,并计算出平方误差
    n++;
 
    printf("The E2 is: %f\n\n", temp2);
 
    while(fabs(temp2 - temp1) != 0)   ///  比较两次平方误差 判断是否相等,不相等继续迭代
    {
    temp1 = temp2;
        getMean(center);
    cluster();
    temp2 = getE();
    n++;
    printf("The E%d is: %f\n", n, temp2);
    }
 
    printf("The total number of cluster is: %d\n\n", n);  /// 统计出迭代次数
    system("pause");
    return 0;
}

猜你喜欢

转载自blog.csdn.net/weixin_39804483/article/details/80853840