Internal sorting of data structures (algorithm introduction, ultra-detailed C++ code explanation and algorithm evaluation)

basic concept

The key to finding is sorting

Criteria for evaluating sorting algorithms

Execution time : measured by the number of keyword comparisons and the number of record moves during algorithm execution

Required auxiliary space : If the auxiliary space required by a sorting algorithm does not depend on the size of the problem n, that is, the space complexity is O(1), it is called in-place sorting, otherwise it is non-in-place sorting.

Stability of the algorithm : If there are two or more records with equal keywords in the record sequence, Ki = Kj, and Ri precedes Rj before sorting, the sorted record sequence still has Ri precede Rj, and the sorting method is called Stable , otherwise it is unstable

Internal sorting : All records can be stored in memory for sorting

External sorting : All records cannot be stored in memory, and data must be exchanged between internal and external sources during the sorting process.

direct insertion sort

insertion sort

Idea: Insert the records Ri to be sorted into the sorted record tables R1, R2...Ri-1, and obtain a new ordered list with the number of records increased by 1 until all records are inserted. .

#include <iostream>
using namespace std;
#define Max_SIZE 100
typedef int keyType;
typedef int infoType; 
typedef struct RecType
{
    keyType key;
    infoType othierinfo;
}RecType;

typedef struct Sqlist
{
    RecType R[Max_SIZE];
    int length;
}Sqlist;

void InsertSort(Sqlist &L)
{   //前半部分为排好序的，遍历一个进行一次插入排序
    int i,j;
    for(i = 2;i<=L.length;i++)
        {//如果遍历到的确实比前半部分最后一个大，插到尾部即可
            if(L.R[i].key<L.R[i-1].key)
            {
                L.R[0] = L.R[i];//把R[0]作为哨兵，记录要执行此次插入的数据
                L.R[i] = L.R[i-1];//把i位置的数据换成i-1的数据，即已经使得前部分的数量+1
                //i-1位置的数据现在被记录到了第i个位置，本身存在无意义，可以理解为空出一个位置，看这个位置能不能插
                for(j=i-2;L.R[0].key<L.R[j].key;--j)
                {   //这个循环是为了把要插入的空位置找到合适的地方
                    //j+1=i-1
                    L.R[j+1]=L.R[j];
                }
                //这个循环结束后肯定能找到合适的位置
                L.R[j+1]=L.R[0];
            }
        }
}

In the best case, the records do not need to be moved (they are sorted by themselves) and the number of keyword comparisons is m=n-1

In the worst case (reverse order), the number of keyword comparisons is m=n^2/2, and the number of record moves is also n^2/2.

The time complexity is O(n^2), and direct insertion is a stable sorting method.

half insertion sort

Similar to direct insertion sort, a sequence will also be divided into two parts, sorted and unsorted. The difference is that when finding the appropriate position, a binary search is used

#include <iostream>
using namespace std;
#define Max_SIZE 100
typedef int keyType;
typedef int infoType; 
typedef struct RecType
{
    keyType key;
    infoType othierinfo;
}RecType;

typedef struct Sqlist
{
    RecType R[Max_SIZE];
    int length;
}Sqlist;

void BInsertSort(Sqlist &L)
{   int i,low,high,mid,j;
    for(i=2;i<=L.length;i++)
    {//这个循环是整体的大循环，遍历每一个数据成员
        L.R[0] = L.R[i];//设置哨兵，让R[0]储存R[i]的数据，R[i]视为空数据点
        low = 1;//初始化low
        high = i-1;//初始化high，指向要进行折半查找的数据成员的上一个，排好序的尾部
        while (low<=high)
        {   //二分查找
            mid = (low+high)/2;//c++向下取整
            if(L.R[0].key<L.R[mid].key) high = mid-1;
            else low = mid+1;
        }
        for(j=i-1;j>=high+1;--j) {L.R[j+1] = L.R[j];}//移动空节点到high+1
        L.R[high + 1] = L.R[0];
    }
}

When inserting the i-th record, you need to round down (log2(i-1))+1 this keyword comparison to determine where it should be inserted.

Time complexity: n^2

Half-way insertion sort is a stable sorting method

Hill sort

First take a positive integer d1 (di<n) as the first increment, divide all n records into di groups, and put all records separated by di into a group, that is, for each k (k=1,2, ··· di), R[k], R[di+k], R[2d,+k]... are placed in the same group, and direct insertion sorting is performed within each group. Such a grouping and sorting process is called one-pass Hill sorting ;

Get a new increment d2<d1, and repeat the grouping and sorting operation of 1; until the increment di=1 is obtained, that is, all records are put into a group and sorted.

//一趟希尔排序的算法，类似于直接插入排序
void ShellInsert(Sqlist &L,int Gap)
{
    int i,j;
    for(i=Gap+1;i<=L.length;++i)
        if(L.R[i].key<L.R[i-Gap].key)
        //此处判定条件是每组的第1和第2个元素进行比较，如果进行过组间的插入排序
        //其顺序肯定是正确的，不会触发此处判断，保证了每组只进行一次插入排序
        {
            L.R[0] = L.R[i];
            for(j=i-Gap;j>0&&(L.R[0].key<L.R[j].key);j-=Gap)
                L.R[j+Gap]=L.R[j];
            L.R[j+Gap] = L.R[0]; 
        }
}
//每次调用即可，dlta里放Gap即可
void ShellSort(Sqlist &L,int dlta[],int t)
{
    int k;
    for(k=0;k<t;++k)
        ShellInsert(L,dlta[k]);
}

Unstable sorting

swap sort

Bubble Sort

Each pass of sorting can find the largest (smallest) number and put it at the last position, so it is called "bubble"

#include <iostream>
using namespace std;
#define Max_SIZE 100
typedef int keyType;
typedef int infoType; 
typedef struct RecType
{
    keyType key;
    infoType othierinfo;
}RecType;

typedef struct Sqlist
{
    RecType R[Max_SIZE];
    int length;
}Sqlist;

void BollenSort(Sqlist &L)
{
    int i,j;
    RecType temp;
    for(i=1;i<L.length;++i)//下标注意，这里我的数据域是从1开始，所以不是通常的length-1
    {
        for(j=1;j<L.length-i-1;j++)
            {
                if(L.R[j].key>L.R[j+1].key)
                {
                    temp = L.R[j];
                    L.R[j] = L.R[j+1];
                    L.R[j+1] = temp;
                }
            }
    }
}

Best case: positive sequence, only n-1 keyword comparisons are performed, and no data is moved.

Worst case: reverse order, perform n-1 sorting operations

Time complexity: n^2

is a stable sorting method

Quick sort

Through one pass of sorting, the records to be sorted are divided into two independent parts. The keywords of one part of the record are smaller than the keywords of the other part of the record. The two parts of the records are then sorted in the next pass to achieve the entire sequence. orderly

Assume that the record sequence to be sorted is R[s...t], pick any record in the record sequence (usually R[s]) as the reference (base, pivot), and use R[s].key as the base. Rearrange all the remaining records so that all keywords smaller than the baseline are placed before R[s], and all keywords greater than the baseline are placed after R[s].

Each record is scanned alternately from both ends of the sequence. The records with the first keyword from the back to the front that are less than the benchmark keyword are placed in front of the sequence. The records with the first keyword from the front to the back that are greater than the benchmark keyword are removed from the sequence. Pick it up at the end and place it at the back of the sequence until all records are scanned.

#include <iostream>
using namespace std;
#define Max_SIZE 100
typedef int keyType;
typedef int infoType; 
typedef struct RecType
{
    keyType key;
    infoType othierinfo;
}RecType;

typedef struct Sqlist
{
    RecType R[Max_SIZE];
    int length;
}Sqlist;
//快速排序
int Partition(Sqlist &L,int low,int high)
{
    int pivotkey;
    L.R[0] = L.R[low];//设置哨兵节点，R[0]保存此次排序作为pivot的R[low]的数据，
    //R[low]空出来，
    pivotkey = L.R[low].key;//记录以下pivot.key
    while(low<high)//由于是交替遍历，low和high总是在不断从两端逼近的
    //因此条件是
    {   
        //从high端扫描数据，直到遇到第一个key比pivotkey小的数据点
        while(low<high&&L.R[high].key>=pivotkey){--high;}
        L.R[low] = L.R[high];//把这个数据点插入到空出来的R[low]上，同时R[high]空出来
        //从low端扫描数据，直到遇到第一个key比pivotkey大的数据点
        while(low<high&&L.R[low].key<=pivotkey){++low;}
        L.R[high] = L.R[low];//把这个数据点插入到空出来的R[high]上，同时R[low]空出来
    }
    //结束交替扫描后空出来的总是R[low],此时high=low，就是最终pivot的位置
    L.R[low]=L.R[0];
    //数据插回去
    return low;//返回一下这次结束时的节点，便于递归调用
}

void QSort(Sqlist &L,int low,int high)//需要调用时，传入数据的起点和终点
{
    int pivotloc;
    if(low<high)//最终，每次递归的low和high都将逐渐靠近，直到low=high
    {
        pivotloc = Partition(L,low,high);
        QSort(L,low,pivotloc-1);//往左边走的递归low永远是第一次传的
        QSort(L,pivotloc+1,high);//往右边走的递归high永远是第一次传的
    }
}

Time complexity: O(nlog2n)

Unstable sorting method

selection sort

The basic idea of Selection Sort is: select the record table with the smallest keyword from the current records to be sorted each time, and then exchange it with the first record in the record sequence to be sorted until the entire record sequence is in order. until.

Simple selection sort

Simple selection sort is also called direct selection sort. The basic operation is as follows

Pass 1: Select the record with the smallest keyword from 1~n records and exchange it with the first record

Pass 2: Select the record with the smallest keyword from 2~n records and exchange it with the second record

...

Pass n-1: Select the record with the smallest keyword from the n-1~n records and exchange it with the n-1 record

#include <iostream>
using namespace std;
#define Max_SIZE 100
typedef int keyType;
typedef int infoType; 
typedef struct RecType
{
    keyType key;
    infoType othierinfo;
}RecType;

typedef struct Sqlist
{
    RecType R[Max_SIZE];
    int length;
}Sqlist;
//选择排序
void selectSort(Sqlist &L)
{
    int i,j,k;
    RecType temp;
    for(i = 1;i<L.length;++i)
    {
        j = i;//用j记录i~n组内最小的位置
        for(k = i+1;k<=L.length;++k)
            if(L.R[k].key<L.R[i].key)
                j = k;
        if(i!=j)//如果相等的话，说明后面的没有比第i个小的，已经是顺序了，无需交换
        {
            temp = L.R[i];
            L.R[i] = L.R[j];
            L.R[j] = temp;
        }
    }
}

Time complexity: O(n^2)

Space complexity: O(1)

tree selection sort

Image source: (13 messages) Tree selection sort_Xuemei’s blog-CSDN blog_Tree selection sort ,

Heap sort

Think of R[1...n] as the sequential storage structure of a complete binary tree. Using the intrinsic relationship between the parent node and the child node in the complete binary tree, select the largest (smallest) keyword in the current unordered area. )record of

The heap is a complete binary tree with a sequential storage structure, and K1 is the root node.

The root node of the heap is the smallest (largest) value in the keyword sequence, which is called the small (large) root heap respectively.

The sequence of elements on the path from the root node to each leaf node is based on the element value (key value). It is non-increasing for a large root heap and non-decreasing for a small root heap.

Any subtree in the heap is also a heap

About the construction and sorting of heaps: [Algorithm] Sorting algorithm heap sorting - Zhihu (zhihu.com)

Average time complexity: O(nlogn)

Optimal time complexity: O(nlogn)

Worst time complexity: O(nlogn)

Stability: Unstable

merge sort

Merging two or more ordered sequences into one ordered sequence is easy to implement using a linear table, and its time complexity is O(m+n)

#include <iostream>
using namespace std;
#define Max_SIZE 100
typedef int keyType;
typedef int infoType; 
typedef struct RecType
{
    keyType key;
    infoType othierinfo;
}RecType;

typedef struct Sqlist
{
    RecType R[Max_SIZE];
    int length;
}Sqlist;

// TR：目标序列，SR：数据序列 i：要进行归并排序的起始位置，
// m：第一端的末尾也是第二段的起点，n：第二段的末尾，也是要进行操作的最后一个位置
void Merge(RecType SR[],RecType (&TR)[Max_SIZE],int loc1,int loc2,int loc3)
{//将有序的SR[loc1...loc2]、SR[loc2+1...loc3]归并为有序的序列TR[loc1...loc3]
    int i = loc1;//第一段指针
    int j = loc2+1;//第二段指针
    int k = loc1;//新序列的位置指针
    for(i,j;i<loc2&&j<=loc3;++k)
        {
            if(SR[i].key<SR[j].key)
                TR[k] = SR[i++];
            else
                TR[k] = SR[j++];
        }
    if(i<=loc2)
        for(i,k;k<=loc3;k++,i++)        
        {
            TR[k] = SR[i];
        }
    if(j<=loc3)
        for(j,k;k<=loc3;k++,j++)        
        {
            TR[k] = SR[j];
        }
}

void MSort(RecType SR[],RecType (&TR1)[Max_SIZE],int s,int t)
{   //将SR[s...t]归并为TR1[s...t]
    int m;
    if(s == t)
        TR1[s] = SR[s];
    else
        m = (s+t)/2;//c++中int相比总是向下取整
    MSort(SR,TR1,s,m);//递归
    MSort(SR,TR1,m+1,t);//递归
    Merge(SR,TR1,s,m,t);
}
void MergeSort(Sqlist &L)
{   
    MSort(L.R,L.R,1,L.length);
}

The number of merges of n records to be sorted is log2n, and the time complexity of one merge sort is O(n). Then the time complexity of the entire merge sort is O(nlog2n) in both the best and worst cases. The space complexity is O(n)

Merge sort is stable

Radix sort

Radix Sorting (Radix Sorting), also known as bucket sorting or numeric sorting: sorting by the components (or "bits") of the keywords of the records to be sorted.
Radix sorting is completely different from the previous internal sorting methods. It does not require comparison of keywords and movement of records. Sorting of single logical keywords is achieved with the help of multi-keyword sorting ideas.
Radix sorting achieves sorting through the process of "distribution" and "collection". It is a method of sorting single keywords with the help of the idea of multi-keyword sorting.

Summarize

Evaluation of each sorting algorithm

Code summary (runnable)

#include <iostream>
using namespace std;
#define Max_SIZE 100
typedef int keyType;
typedef int infoType; 
typedef struct RecType
{
    keyType key;
    infoType othierinfo;
}RecType;

typedef struct Sqlist
{
    RecType R[Max_SIZE];
    int length;
}Sqlist;

void InsertSort(Sqlist &L)
{   //前半部分为排好序的，遍历一个进行一次插入排序
    int i,j;
    for(i = 2;i<=L.length;i++)
        {//如果遍历到的确实比前半部分最后一个大，插到尾部即可
            if(L.R[i].key<L.R[i-1].key)
            {
                L.R[0] = L.R[i];//把R[0]作为哨兵，记录要执行此次插入的数据
                L.R[i] = L.R[i-1];//把i位置的数据换成i-1的数据，即已经使得前部分的数量+1
                //i-1位置的数据现在被记录到了第i个位置，本身存在无意义，可以理解为空出一个位置，看这个位置能不能插
                for(j=i-2;L.R[0].key<L.R[j].key;--j)
                {   //这个循环是为了把要插入的空位置找到合适的地方
                    //j+1=i-1
                    L.R[j+1]=L.R[j];
                }
                //这个循环结束后肯定能找到合适的位置
                L.R[j+1]=L.R[0];
            }
        }
}

void BInsertSort(Sqlist &L)
{   int i,low,high,mid,j;
    for(i=2;i<=L.length;i++)
    {//这个循环是整体的大循环，遍历每一个数据成员
        L.R[0] = L.R[i];//设置哨兵，让R[0]储存R[i]的数据，R[i]视为空数据点
        low = 1;//初始化low
        high = i-1;//初始化high，指向要进行折半查找的数据成员的上一个，排好序的尾部
        while (low<=high)
        {   //二分查找
            mid = (low+high)/2;//c++向下取整
            if(L.R[0].key<L.R[mid].key) high = mid-1;
            else low = mid+1;
        }
        for(j=i-1;j>=high+1;--j) {L.R[j+1] = L.R[j];}//移动空节点到high+1
        L.R[high + 1] = L.R[0];
    }
}


//一趟希尔排序的算法，类似于直接插入排序
void ShellInsert(Sqlist &L,int Gap)
{
    int i,j;
    for(i=Gap+1;i<=L.length;++i)
        if(L.R[i].key<L.R[i-Gap].key)
        //此处判定条件是每组的第1和第2个元素进行比较，如果进行过组间的插入排序
        //其顺序肯定是正确的，不会触发此处判断，保证了每组只进行一次插入排序
        {
            L.R[0] = L.R[i];
            for(j=i-Gap;j>0&&(L.R[0].key<L.R[j].key);j-=Gap)
                L.R[j+Gap]=L.R[j];
            L.R[j+Gap] = L.R[0]; 
        }
}
//每次调用即可，dlta里放Gap即可
void ShellSort(Sqlist &L,int dlta[],int t)
{
    int k;
    for(k=0;k<t;++k)
        ShellInsert(L,dlta[k]);
}


//冒泡排序,从小到大
void BollenSort(Sqlist &L)
{
    int i,j;
    RecType temp;
    for(i=1;i<L.length;++i)//下标注意，这里我的数据域是从1开始，所以不是通常的length-1
    {
        for(j=1;j<L.length-i-1;j++)
            {
                if(L.R[j].key>L.R[j+1].key)
                {
                    temp = L.R[j];
                    L.R[j] = L.R[j+1];
                    L.R[j+1] = temp;
                }
            }
    }
}
//快速排序
int Partition(Sqlist &L,int low,int high)
{
    int pivotkey;
    L.R[0] = L.R[low];//设置哨兵节点，R[0]保存此次排序作为pivot的R[low]的数据，
    //R[low]空出来，
    pivotkey = L.R[low].key;//记录以下pivot.key
    while(low<high)//由于是交替遍历，low和high总是在不断从两端逼近的
    //因此条件是
    {   
        //从high端扫描数据，直到遇到第一个key比pivotkey小的数据点
        while(low<high&&L.R[high].key>=pivotkey){--high;}
        L.R[low] = L.R[high];//把这个数据点插入到空出来的R[low]上，同时R[high]空出来
        //从low端扫描数据，直到遇到第一个key比pivotkey大的数据点
        while(low<high&&L.R[low].key<=pivotkey){++low;}
        L.R[high] = L.R[low];//把这个数据点插入到空出来的R[high]上，同时R[low]空出来
    }
    //结束交替扫描后空出来的总是R[low],此时high=low，就是最终pivot的位置
    L.R[low]=L.R[0];
    //数据插回去
    return low;//返回一下这次结束时的节点，便于递归调用
}

void QSort(Sqlist &L,int low,int high)//需要调用时，传入数据的起点和终点
{
    int pivotloc;
    if(low<high)//最终，每次递归的low和high都将逐渐靠近，直到low=high
    {
        pivotloc = Partition(L,low,high);
        QSort(L,low,pivotloc-1);//往左边走的递归low永远是第一次传的
        QSort(L,pivotloc+1,high);//往右边走的递归high永远是第一次传的
    }
}

//选择排序
void selectSort(Sqlist &L)
{
    int i,j,k;
    RecType temp;
    for(i = 1;i<L.length;++i)
    {
        j = i;//用j记录i~n组内最小的位置
        for(k = i+1;k<=L.length;++k)
            if(L.R[k].key<L.R[i].key)
                j = k;
        if(i!=j)//如果相等的话，说明后面的没有比第i个小的，已经是顺序了，无需交换
        {
            temp = L.R[i];
            L.R[i] = L.R[j];
            L.R[j] = temp;
        }
    }
}


// TR：目标序列，SR：数据序列 i：要进行归并排序的起始位置，
// m：第一端的末尾也是第二段的起点，n：第二段的末尾，也是要进行操作的最后一个位置
void Merge(RecType SR[],RecType (&TR)[Max_SIZE],int loc1,int loc2,int loc3)
{//将有序的SR[loc1...loc2]、SR[loc2+1...loc3]归并为有序的序列TR[loc1...loc3]
    int i = loc1;//第一段指针
    int j = loc2+1;//第二段指针
    int k = loc1;//新序列的位置指针
    for(i,j;i<loc2&&j<=loc3;++k)
        {
            if(SR[i].key<SR[j].key)
                TR[k] = SR[i++];
            else
                TR[k] = SR[j++];
        }
    if(i<=loc2)
        for(i,k;k<=loc3;k++,i++)        
        {
            TR[k] = SR[i];
        }
    if(j<=loc3)
        for(j,k;k<=loc3;k++,j++)        
        {
            TR[k] = SR[j];
        }
}

void MSort(RecType SR[],RecType (&TR1)[Max_SIZE],int s,int t)
{   //将SR[s...t]归并为TR1[s...t]
    int m;
    if(s == t)
        TR1[s] = SR[s];
    else
        m = (s+t)/2;//c++中int相比总是向下取整
    MSort(SR,TR1,s,m);//递归
    MSort(SR,TR1,m+1,t);//递归
    Merge(SR,TR1,s,m,t);
}
void MergeSort(Sqlist &L)
{   
    MSort(L.R,L.R,1,L.length);
}


void showSqlist(Sqlist& L)
{
    cout << "Sqlist size:" << L.length << endl;
    for (int i = 1; i < L.length+1; ++i)
    {
        cout << L.R[i].key << " ";
    }
    cout << endl;
    for (int i = 1; i < L.length+1; ++i)
    {
        cout << L.R[i].othierinfo << " ";
    }
    cout << endl;
}

int main()
{
    Sqlist testlist = { { {886,1},{3,1},{5,1},{4,1},{7,1},{2,1},{1,1},{9,1} },7};
    showSqlist(testlist);
    //int dltaa[3] = {5,3,1};

    //ShellSort(testlist,dltaa,3);
    BollenSort(testlist);
    
    showSqlist(testlist);
};