Data separating means algorithm wine C-

Introduction algorithm:
This algorithm is used to separate out the data in a pile of several types of data, such as a pile of bills, its color, size, etc. of data acquisition, to distinguish 100 yuan, 50, 20 and the like by analyzing the data. The main approach is to assume that the data set is a C class, case 10 may optionally heap data in this data as the initial cluster centers, calculate the size of the distance to the other data points, according to the minimum distance be classified as one type. Through continuous iterative calculation to select a new cluster center, and then calculate a new distance, divided by the minimum distance, until it is no longer the center of the cluster changes.

The data is to be separated dataset wine, known bunch of wine, these wines come from three different places, also has these features until the wine: taste, color, sugar content, a total of 13 kinds of features, now these features will collect data, each data representative of a bottle, to distinguish the origin of these wines.
Following figure wine: total 178 rows, i.e. bottle 178, wherein each row has 13 data (a first classification is correcting data)
Here Insert Picture Description
specific procedures:

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace WineIde
{
    class Program
    {
        static void Main(string[] args)
        {
            List<string[]> WineData = new List<string[]>();
            WineData = ReadCsv(@"C:\Users\user\Desktop\wine.txt");
            List<double> FirstClass = new List<double>();
            List<double> SecondClass = new List<double>();
            List<double> ThirdClass = new List<double>();
            double[,] NumData = new double[WineData.Count, 13];
            for (int i = 0; i < WineData.Count; i++)
            {
                for (int j = 1; j < 14; j++)
                {
                    NumData[i, j-1] = Double.Parse(WineData[i][j]);
                }
            }
            //FirstClass.Add(0);
            //SecondClass.Add(59);
            //ThirdClass.Add(130);
           // for (int m = 0; m < 4; m++)
            //{
                for (int i = 0; i < WineData.Count; i++)
                {
                    double First = 0;
                    double Second = 0;
                    double Third = 0;
                   
                for (int j = 0; j < 1; j++)
                    {

                        First += (NumData[i, j] - NumData[0, j]) * (NumData[i, j] - NumData[0, j]);
                        Second += (NumData[i, j] - NumData[59, j]) * (NumData[i, j] - NumData[59, j]);
                        Third += (NumData[i, j] - NumData[130, j]) * (NumData[i, j] - NumData[130, j]);

                    }
                    if (First < Second && First < Third)
                    {
                        FirstClass.Add(i);
                    }
                    if (First > Second && Second < Third)
                    {
                        SecondClass.Add(i);
                    }
                    if (First > Third && Second >Third)
                    {
                        ThirdClass.Add(i);
                    }
              }


            for (int A = 0; A < 4; A++)
            {
                
                double[,] FirstClass1 = new double[1, 13];
                double[,] FirstClass2 = new double[1, 13];
                double[,] FirstClass3 = new double[1, 13];
                foreach (int i in FirstClass)
                {
                    for (int k = 0; k < 13; k++)
                    {
                        FirstClass1[0, k] = (FirstClass1[0, k] + NumData[i, k]);
                    }

                }
                for (int i = 0; i < 13; i++)
                {
                    FirstClass1[0, i] = FirstClass1[0, i] / FirstClass.Count;
                }


                foreach (int i in SecondClass)
                {
                    for (int k = 0; k < 13; k++)
                    {
                        FirstClass2[0, k] = (FirstClass2[0, k] + NumData[i, k]);
                    }

                }
                for (int i = 0; i < 13; i++)
                {
                    FirstClass2[0, i] = FirstClass2[0, i] / SecondClass.Count;
                }


                foreach (int i in ThirdClass)
                {
                    for (int k = 0; k < 13; k++)
                    {
                        FirstClass3[0, k] = (FirstClass3[0, k] + NumData[i, k]);
                    }

                }
                for (int i = 0; i < 13; i++)
                {
                    FirstClass3[0, i] = FirstClass3[0, i] / ThirdClass.Count;
                }
                FirstClass.Clear();
                SecondClass.Clear();
                ThirdClass.Clear();
                for (int i = 0; i < WineData.Count; i++)
                {
                    double First = 0;
                    double Second = 0;
                    double Third = 0;

                    for (int j = 0; j < 13; j++)
                    {

                        First += (NumData[i, j] - FirstClass1[0, j]) * (NumData[i, j] - FirstClass1[0, j]);
                        Second += (NumData[i, j] - FirstClass2[0, j]) * (NumData[i, j] - FirstClass2[0, j]);
                        Third += (NumData[i, j] - FirstClass3[0, j]) * (NumData[i, j] - FirstClass3[0, j]);

                    }
                    if (First < Second && First < Third)
                    {
                        FirstClass.Add(i);
                    }
                    if (First > Second && Second < Third)
                    {
                        SecondClass.Add(i);
                    }
                    if (First > Third && Second > Third)
                    {
                        ThirdClass.Add(i);
                    }
                }


            }

            Console.WriteLine("第一类数据:");
            for (int i = 0; i < FirstClass.Count; i++)
            {

                Console.Write("{0}  ", FirstClass[i]);
            }
            Console.WriteLine();
            Console.WriteLine("第二类数据:");
            for (int i = 0; i < SecondClass.Count; i++)
            {
                Console.Write("{0}  ", SecondClass[i]);
            }
            Console.WriteLine();
            Console.WriteLine("第三类数据:");
            for (int i = 0; i < ThirdClass.Count; i++)
            {
                Console.Write("{0}  ", ThirdClass[i]);
            }
            int num = FirstClass.Count + SecondClass.Count + ThirdClass.Count;
            Console.WriteLine("总数是:{0}", num);

        }
        public static List<string[]> ReadCsv(string PathName)
        {
            FileStream fs = new FileStream(PathName, FileMode.Open, FileAccess.Read);
            StreamReader ReadFile = new StreamReader(fs, Encoding.Default);
            string strRead = "";
            List<string[]> lsFile = new List<string[]>();
            while (strRead!=null)
            {
                strRead = ReadFile.ReadLine();
                if (strRead!=null&&strRead.Length>0)
                {
                    lsFile.Add(strRead.Split(','));
                }
            }
            ReadFile.Close();
            return lsFile;


        }
    }
}

Results: The
Here Insert Picture Description
accuracy to be improved.

Guess you like

Origin blog.csdn.net/DOUBLE121PIG/article/details/89327668