DBSCAN algorithm c++ implementation

 First, calculate the points within e from each point. If the number >= minPts, then find out its direct density reachable and indirectly reachable points, use visited to mark whether the point is already in the cluster, and loop until the last one point.

#include <fstream>
#include <vector>
#include <iostream>
#include <cmath>
#include <set>
#include <sstream>
using namespace std;

struct Point
{
    double x;
    double y;
};

double distance(const Point& a, const Point& b)
{
    return sqrt(pow(a.x - b.x,2) + pow(a.y - b.y,2));
}

vector <set<int>> getPointSet(const vector<Point> data, double e)
{
    vector<set<int>> PointSet(data.size());         //用到了PointSet[i],要先进行初始化
    //vector<set<int>> PointSet;
    for(int i=0;i<data.size();i++)
    {
        for (int j = 0; j < data.size(); j++)
        {
            if (distance(data[i], data[j]) <= e)
            {
                PointSet[i].insert(j);         //j还是j+1
            }
        }
    }
    return PointSet;
}

vector<set<int>> DAIANA(vector<set<int>> point,int MinPts)
{
    vector<bool> visted(point.size(),false);
    vector<set<int>> cluster;
    for (int i = 0; i < point.size(); i++)
    {
        if (!visted[i])
        {
            if (point[i].size() >= MinPts)              //{几个}  
            {
                for (set<int>::const_iterator j = point[i].begin(); j != point[i].end(); j++)
                {
                    visted[*j] = true;
                    if (point[*j].size() >= MinPts)         //间接相连
                    {
                        for (auto k = point[*j].begin(); k != point[*j].end(); k++)
                        {
                            visted[*k] = true;
                            point[i].insert(*k);
                        }
                        //point[i].insert(point[*j].begin(), point[*j].end());
                    }
                }
                cluster.push_back(point[i]);
            }
        }
       
    }
    return cluster;
}


vector<Point> ReadData(string filename)
{
    vector<Point> data;
    ifstream file(filename);
    if (file.is_open())
    {
        string line;
        while (getline(file, line))
        {
            istringstream iss(line);
            double x, y;
            string token;
            Point point;
            if (getline(iss, token, ',') && istringstream(token) >> point.x &&
                getline(iss, token, ',') && istringstream(token) >> point.y) {
                data.push_back(point);
            }
        }
    }
    else
    {
        cout << "open fail";
    }
    file.close();
    return data;
}

int main()
{ 
    vector<Point> dataset = ReadData("data.txt");
    double e;
    int MinPts;
    cin >> e >> MinPts;

    vector <set<int>> point;
    vector<set<int>> cluster;
    point=getPointSet(dataset, e);
    cluster=DAIANA(point, MinPts);
    for (int i = 0; i < cluster.size(); i++)
    {
        cout << "{";
        int count = 0;
        for (auto j = cluster[i].begin(); j != cluster[i].end(); j++)
        {
            
                cout << *j+1 ;
                if (++count < cluster[i].size())
                {
                    cout << ",";
                }
        }
        cout << "}" << endl;
    }
}

or 

Determine whether it is a core point. If so, put its directly density reachable points into set<int> neighbors; and then determine if there are core points among these directly density reachable points, then put the directly density reachable points into set< ;int> subNeighbors, last neighbors.insert(subNeighbors.begin(), subNeighbors.end());

std::vector<std::vector<Point>> dbscan(const std::vector<Point>& dataset, double epsilon, int minPts) {
    std::vector<std::vector<Point>> clusters;
    std::vector<bool> visited(dataset.size(), false);
    
    for (int i = 0; i < dataset.size(); ++i) {
        if (visited[i]) continue;
        
        visited[i] = true;
        std::vector<Point> cluster;
        
        std::set<int> neighbors;
        for (int j = 0; j < dataset.size(); ++j) {
            if (calculateDistance(dataset[i], dataset[j]) <= epsilon) {
                neighbors.insert(j);
            }
        }
        
        if (neighbors.size() >= minPts) {
            for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {
                int index = *it;
                if (!visited[index]) {
                    visited[index] = true;
                    std::set<int> subNeighbors;
                    
                    for (int j = 0; j < dataset.size(); ++j) {
                        if (calculateDistance(dataset[index], dataset[j]) <= epsilon) {
                            subNeighbors.insert(j);
                        }
                    }
                    
                    if (subNeighbors.size() >= minPts) {
                        neighbors.insert(subNeighbors.begin(), subNeighbors.end());
                    }
                }
            }
        }
        
        for (auto it = neighbors.begin(); it != neighbors.end(); ++it) {
            cluster.push_back(dataset[*it]);
        }
        
        if (!cluster.empty()) {
            clusters.push_back(cluster);
        }
    }
    
    return clusters;
}

Data set: Reference Data Mining Principles and Algorithms 4th Edition DBSCAN example

1.0, 0.0
4.0, 0.0
0.0, 1.0
1.0, 1.0
2.0, 1.0
3.0,1.0    
4.0,1.0    
5.0,1.0    
0.0,2.0    
1.0,2.0    
4.0,2.0    
1.0,3.0

result

Guess you like

Origin blog.csdn.net/m0_70732442/article/details/134082366