KMeans(聚类算法)-按地图位置距离-计算常去位置

package com.*.prophet.algorithm.sugar;

import com.*.prophet.algorithm.core.KMeans;

/**
 * @Author:langjf
 * @Date: 2021/9/14
 * @Desc K——Menaus 判断 位置相距多少米为最优K
 */
public class LocationsKMeansHelper {

    private final static double PI = 3.14159265358979323; // 圆周率
    private final static double R = 6371229; // 地球的半径

    public static double getDistance(double lng1, double lat1, double lng2, double lat2) {
        double x, y, distance;
        x = (lng2 - lng1) * PI * R
                * Math.cos(((lat1 + lat2) / 2) * PI / 180) / 180;
        y = (lat2 - lat1) * PI * R / 180;
        distance = Math.hypot(x, y);
        return distance;
    }

    //以两个位置之间的距离寻找最优解
    public static void KMeans(float[][] datas, double range) {
        //默认簇节点为4
        com.gwm.prophet.algorithm.core.KMeans s = new KMeans(datas);
        s.cluster();
        while (!isClassify(s, range)) {
            s.setResent(s.getClassCount() + 1, s);
        }
        s.printConsole();
    }

    //判断是否最优K
    public static boolean isClassify(KMeans s, double range) {
        boolean isClassify = true;
        for (int i = 0; i < s.getInstanceNumber(); i++) {
            int j = Integer.valueOf(String.valueOf(s.getData()[i][2]).substring(0, 1));
            float[][] data = s.getData();
            double distance = getDistance(data[i][0], data[i][1], data[j][0], data[j][1]);
            if (range < distance) {
                isClassify = false;
                break;
            }
        }
        return isClassify;
    }


    public static void main(String[] args) {
        float[][] data = new float[10][3];
        data[0] = new float[]{(float) 99.75346, (float) 24.09003, 0};
        data[1] = new float[]{(float) 106.75346, (float) 25.09003, 0};
        data[2] = new float[]{(float) 107.75346, (float) 27.09003, 0};
        data[3] = new float[]{(float) 106.75346, (float) 27.09003, 0};
        data[4] = new float[]{(float) 105.75346, (float) 28.09003, 0};
        data[5] = new float[]{(float) 104.75346, (float) 25.09003, 0};
        data[6] = new float[]{(float) 104.75346, (float) 25.09003, 0};
        data[7] = new float[]{(float) 104.75346, (float) 25.09003, 0};
        data[8] = new float[]{(float) 104.75346, (float) 21.09003, 0};
        data[8] = new float[]{(float) 104.75346, (float) 21.09003, 0};
        KMeans(data, 500);
    }
}
package com.gwm.prophet.algorithm.core;

import lombok.Data;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;

/**
 * @Author:langjf
 * @Date: 2021/9/1
 * @Desc
 */

class PPoint {
    public float x;
    public float y;
    public int flag = -1;

    public PPoint() {

    }

    public PPoint(float x, float y) {
        this.x = x;
        this.y = y;
    }
}

@Data
public class KMeans {
    //聚类的数目
      int ClassCount =4;
    //样本数目(测试集)
      int InstanceNumber = 9;
    //样本属性数目(测试)
      int FieldCount = 2;

    //设置异常点阈值参数(每一类初始的最小数目为InstanceNumber/ClassCount^t)
    final static double t = 2.0;

    //存放数据的矩阵
    private float[][] data;
    //每个类的均值中心
    private float[][] classData;
    //噪声集合索引
    private ArrayList<Integer> noises;
    //存放每次变换结果的矩阵
    private ArrayList<ArrayList<Integer>> resultList;
    //数据归1最大值
    float[] max = new float[FieldCount];


    public KMeans(float[][] data) {
        this.data = data;
        this.classData = new float[ClassCount][FieldCount];
        this.InstanceNumber=data.length;
        noises = new ArrayList<Integer>();
        resultList = new ArrayList<ArrayList<Integer>>(ClassCount);
    }

    public KMeans() {
        //最后一位用来储存结果
        classData = new float[ClassCount][FieldCount];
        data = new float[InstanceNumber][FieldCount+1];
        noises = new ArrayList<Integer>();
        resultList = new ArrayList<ArrayList<Integer>>(ClassCount);

        data[0]= new float[]{(float) 99.75346, (float)24.09003,0};
        data[1]= new float[]{(float) 106.75346,(float)25.09003,0};
        data[2]= new float[]{(float) 107.75346,(float)27.09003,0};
        data[3]= new float[]{(float) 106.75346,(float)27.09003,0};
        data[4]= new float[]{(float) 105.75346,(float)28.09003,0};
        data[5]= new float[]{(float) 104.75346,(float)25.09003,0};
        data[6]= new float[]{(float) 104.75346,(float)25.09003,0};
        data[7]= new float[]{(float) 104.75346,(float)25.09003,0};
        data[8]= new float[]{(float) 104.75346,(float)21.09003,0};


    }

    public static void main(String[] args) {
        KMeans s=  new KMeans();
        s.cluster();
        s.printConsole();
    }

    public void  setResent(int classCount, KMeans s){
        s.ClassCount = classCount;
        s.data = data;
        s.classData = new float[ClassCount][FieldCount];
        s.InstanceNumber=data.length;
        s.noises = new ArrayList<Integer>();
        s.resultList = new ArrayList<ArrayList<Integer>>(ClassCount);
        s.cluster();
    }


    public void cluster() {
        //数据归一化
        normalize();

        //标记是否需要重新找初始点
        boolean needFindInitials = true;

        //找初始点的迭代次数
        int times = 1;

        //找初始点
        while (needFindInitials) {
            needFindInitials = false;
            resultList.clear();
            System.out.println("寻找第" + (times++) + "次");

            //一次找初始点的尝试和根据初始点的分类
            findInitials();
            firstClassify();

            for (int i = 0; i < resultList.size(); i++) {
                if (resultList.get(i).size() < InstanceNumber / Math.pow(ClassCount, t)) {
                    needFindInitials = true;
                    noises.addAll(resultList.get(i));
                }
            }
        }

        Adjust();
    }


    private void normalize() {
        // 计算数据每个维度最大值max
        for (int i = 0; i < InstanceNumber; i++) {
            for (int j = 0; j < FieldCount; j++) {
                if (data[i][j] > max[j]) {
                    max[j] = data[i][j];
                }
            }
        }

        // 每个维度归一化值=原始值/max
        for (int i = 0; i < InstanceNumber; i++) {
            for (int j = 0; j < FieldCount; j++) {
                data[i][j] = data[i][j] / max[j];
            }
        }
    }

    /**
     * 寻找初始聚类中心
     */
    private void findInitials() {
        int i, j, a, b;
        i = j = a = b = 0;
        float maxDis = 0;
        int alreadyCls = 2;

        // 选取距离最远的两个点a,b作为聚类中心点
        ArrayList<Integer> initials = new ArrayList<Integer>();
        for (; i < InstanceNumber; i++) {
            // 噪声点不参与计算
            if (noises.contains(i)) {
                continue;
            }
            j = i + 1;
            for (; j < InstanceNumber; j++) {
                // 噪声点不参与计算
                if (noises.contains(j)) {
                    continue;
                }
                float newDis = calDis(data[i], data[j]);
                if (maxDis < newDis) {
                    a = i;
                    b = j;
                    maxDis = newDis;
                }
            }
        }

        // initials添加初始聚类中心点序号a,b
        initials.add(a);
        initials.add(b);

        // classData添加聚类中心点data[a],data[b]
        classData[0] = data[a];
        classData[1] = data[b];

        // 新增两个聚类,并添加聚类成员
        ArrayList<Integer> resultOne = new ArrayList<Integer>();
        ArrayList<Integer> resultTwo = new ArrayList<Integer>();
        resultOne.add(a);
        resultTwo.add(b);
        resultList.add(resultOne);
        resultList.add(resultTwo);

        // 1、计算剩下每个点x与其他点的最小距离l,并记录Map<x,l>
        // 2、选取Map<x,l>中的最大l,并以对应的点x作为新的聚类中心
        while (alreadyCls < ClassCount) {
            i = j = 0;
            float maxMin = 0;
            int newClass = -1;

            for (; i < InstanceNumber; i++) {
                float min = 0;
                float newMin = 0;
                if (initials.contains(i)) {
                    continue;
                }
                if (noises.contains(i)) {
                    continue;
                }
                for (j = 0; j < alreadyCls; j++) {
                    newMin = calDis(data[i], classData[j]);
                    if (min == 0 || newMin < min) {
                        min = newMin;
                    }
                }
                if (min > maxMin) {
                    maxMin = min;
                    newClass = i;
                }
            }

            // initials添加新的聚类中心点序号newClass
            initials.add(newClass);

            // classData添加新的聚类中心点data[newClass]
            classData[alreadyCls++] = data[newClass];

            // 新增一个聚类,并添加成员
            ArrayList<Integer> rslt = new ArrayList<Integer>();
            rslt.add(newClass);
            resultList.add(rslt);
        }
    }

    /**
     * 首次聚类分配
     * 点x到哪个聚类中心点最近,则划分到哪个聚类
     */
    public void firstClassify() {
        for (int i = 0; i < InstanceNumber; i++) {
            float min = 0f;
            int clsId = -1;
            for (int j = 0; j < classData.length; j++) {
                // 欧式距离
                float newMin = calDis(classData[j], data[i]);
                if (clsId == -1 || newMin < min) {
                    clsId = j;
                    min = newMin;
                }
            }

            if (!resultList.get(clsId).contains(i)) {
                resultList.get(clsId).add(i);
            }
        }
    }

    // 迭代分类,直到各个类的数据不再变化
    public void Adjust() {
        // 记录是否发生变化
        boolean change = true;

        // 循环的次数
        int times = 1;
        while (change) {
            // 复位
            change = false;
            System.out.println("迭代" + (times++) + "次");

            // 重新计算每个类的均值
            for (int i = 0; i < ClassCount; i++) {
                // 原有的数据
                ArrayList<Integer> cls = resultList.get(i);

                // 新的均值
                float[] newMean = new float[FieldCount];

                // 计算均值
                for (Integer index : cls) {
                    for (int j = 0; j < FieldCount; j++)
                        newMean[j] += data[index][j];
                }
                for (int j = 0; j < FieldCount; j++) {
                    newMean[j] /= cls.size();
                }
                if (!compareMean(newMean, classData[i])) {
                    classData[i] = newMean;
                    change = true;
                }
            }
            // 清空之前的数据
            for (ArrayList<Integer> cls : resultList) {
                cls.clear();
            }

            // 重新分配
            for (int i = 0; i < InstanceNumber; i++) {
                float min = 0f;
                int clsId = -1;
                for (int j = 0; j < classData.length; j++) {
                    float newMin = calDis(classData[j], data[i]);
                    if (clsId == -1 || newMin < min) {
                        clsId = j;
                        min = newMin;
                    }
                }
                data[i][FieldCount] = clsId;
                resultList.get(clsId).add(i);
            }
        }
    }

    /**
     * 计算a样本和b样本的欧式距离作为不相似度
     *
     * @param aVector 样本a
     * @param bVector 样本b
     * @return 欧式距离长度
     */
    private float calDis(float[] aVector, float[] bVector) {
        double dis = 0;
        int i = 0;
        /* 最后一个数据在训练集中为结果,所以不考虑 */
        for (; i < aVector.length; i++)
            dis += Math.pow(bVector[i] - aVector[i], 2);
        dis = Math.pow(dis, 0.5);
        return (float) dis;
    }

    /**
     * 判断两个均值向量是否相等
     *
     * @param a 向量a
     * @param b 向量b
     * @return
     */
    private boolean compareMean(float[] a, float[] b) {
        if (a.length != b.length)
            return false;
        for (int i = 0; i < a.length; i++) {
            if (a[i] > 0 && b[i] > 0 && a[i] != b[i]) {
                return false;
            }
        }
        return true;
    }


    /**
     * 打印结果
     */
    public void printConsole(){
        for (int i = 0; i < InstanceNumber; i++) {
            System.out.println(String.valueOf(data[i][FieldCount]).substring(0, 1));
        }

        // 统计每类的数目,打印到控制台
        for (int i = 0; i < ClassCount; i++) {
            System.out.println("第" + (i + 1) + "类数目: "
                    + resultList.get(i).size());
        }
    }

}

猜你喜欢

转载自blog.csdn.net/qq_34936628/article/details/120293334