Data structure - heap implementation

Table of contents:

1. Implementation of heap

1.1 Definition of heap

Heap (heap) is a special data structure. The heap is usually an array object that can be viewed as a complete binary tree(logical level) (physical level), often used in a group of frequent changes (higher frequency of additions, deletions, and changes)Find the maximum value in the data of . The heap with the largest root node is called the maximum heap Or a large root heap, so that the maximum value in the heap (the value of the root node) can be found; the heap with the smallest root node is called a min heap or a small root heap, so that the minimum value in the heap can be found.

The heap is not necessarily a complete binary tree, it is just for conveniencestorage and index , we usually use the form of a complete binary tree to represent the heap.
Binary heap: It is an array, which can be regarded as an approximate complete binary tree.

The maximum heap and the minimum heap are as shown in the figure:
Insert image description here

Maximum heap: the root node is greater than the value of the left and right subtree nodes, and the value of the left and right subtree nodes is greater than the value of its own left and right subtree nodes, and this repeats; minimum heap: the root node is smaller than the left and right subtree nodes The value of the left and right subtree nodes is less than the value of its own left and right subtree nodes, and it repeats.

1.2 Implementation of heap

Implement a heap using an array

1.2.1 Various interfaces of the heap

#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
typedef int HPDataType;
typedef struct Heap
{
    
    
	HPDataType* _a;//动态数组
	int _size;//存储数据的下标
	int _capacity;//动态数组的容量
}Heap;
//堆的初始化
void HeapInit(Heap* hp);
// 堆的销毁
void HeapDestory(Heap* hp);
// 堆的插入
void HeapPush(Heap* hp, HPDataType x);
// 堆的删除
void HeapPop(Heap* hp);
// 取堆顶的数据
HPDataTypeHeapTop(Heap* hp);
// 堆的数据个数
int HeapSize(Heap* hp);
// 堆的判空
int HeapEmpty(Heap* hp);

1.2.2 Heap upward adjustment

//向上调整算法
void HeapJustUp(HPDataType a[], HPDataType child)
{
    
    
    int parsent;
    parsent = (child - 1) / 2;//找到孩子的父亲
    while (child > 0)
    {
    
    
        int tmp = 0;
        if (a[parsent] < a[child])//孩子比父亲的值大,
        {
    
    
            tmp = a[child];
            a[child] = a[parsent];
            a[parsent] = tmp;
        }
        else
            break;
        child = parsent;
        parsent = (parsent - 1) / 2;//找到孩子的父亲
    }
}

For upward adjustment, we regard it as an array structure and logically as a complete binary tree. We can adjust the data to be inserted into the heap into a large heap by adjusting it upward. The upward adjustment algorithm has a premise: in addition to the data to be inserted, other data has already formed a large pile, so that it can be adjusted.

1.2.3 Downward adjustment of the heap

void HeapJustDown(Heap* hp)
{
    
    
    //先假设当前待调整结点的左孩子结点存在
        //并且是待调整结点的左右孩子结点(不管右孩子结点存不存在,都这样假设)中值最大的
    int parent = 0;//根节点
    int child = parent * 2 + 1;//孩子结点
        while (child < hp->_size)
        {
    
    
            //child+1 < hp->_size说明右孩子结点确实存在
            //如果hp->_a[child] < hp->_a[child+1]也成立,那说明左右孩子结点中值最大的是右孩子结点
            if ((child + 1 < hp->_size) && hp->_a[child] < hp->_a[child + 1])
            {
    
    
                child = child + 1;
            }
            //如果a[child]>a[parent],则说明父节点比比左右孩子节点的值都要小,要置换
            if (hp->_a[child] > hp->_a[parent])
            {
    
    
                int tmp = hp->_a[parent];
                hp->_a[parent] = hp->_a[child];
                hp->_a[child] = tmp;
                parent = child;
                child = child * 2 + 1;
            }
            //如果a[child] <= a[parent],那就不需要进行调整
            else
            {
    
    
                break;
            }
        }
}

For downward adjustment, we regard it as an array structure and logically as a complete binary tree. We can adjust it into a large heap through the downward adjustment algorithm starting from the root node. The downward adjustment algorithm has a premise: the left and right subtrees must be a heap before they can be adjusted.

1.2.4 Heap definition declaration and initialization

1.Heap declaration

typedef int HPDataType;
typedef struct Heap
{
    
    
	HPDataType* _a;//动态数组
	int _size;//存储数据的下标
	int _capacity;//动态数组的容量
}Heap;

Create a structure that forms a dynamic array

2. Initialization of the heap

// 堆的初始化
void HeapInit(Heap* hp)
{
    
    
    hp->_a = (HPDataType*)malloc(sizeof(HPDataType) * 4);
    if (hp->_a == 0)
    {
    
    
        printf("malloc is error\n");
        exit(-1);
    }
    hp->_capacity = 4;
    hp->_size = 0;
}

Open space and initialize

1.2.5 Heap data processing

1. Insertion into the heap

// 堆的插入
void HeapPush(Heap* hp, HPDataType x)
{
    
    
    //数据满了,需要扩容
    if (hp->_capacity == hp->_size)
    {
    
    
        HPDataType* tmp = (HPDataType*)realloc(hp->_a, sizeof(HPDataType)*hp->_capacity * 2);
        if (tmp == NULL)
        {
    
    
            printf("realloc is error");
            exit(-1);
        }
        hp->_a = tmp;
        hp->_capacity = hp->_capacity * 2;
    }
    //不需要扩容
    hp->_a[hp->_size++] = x;//插入数据,然后_size+1
    //一般数据都是放到数组尾得,建堆,向上调整,这里我们建大堆
    HeapJustUp(hp->_a, hp->_size - 1);
}

1. If the capacity is not enough, expand it
2. If the capacity is sufficient, insert data
3. Then adjust upward to build a large heap until the heap is satisfied a>

2. Deletion of heap

// 堆的删除,从堆顶开始删
void HeapPop(Heap* hp)
{
    
    
assert(hp);//断言为空为假的话就报错
assert(!HeapEmpty(hp));//断言如果不是空为真就执行
//首元素的的值与尾元素交换,然后删除尾元素
int tmp = hp->_a[0];
hp->_a[0] = hp->_a[hp->_size - 1];
hp->_a[hp->_size - 1] = tmp;
hp->_size--;
//堆顶元素进行向下调整
HeapJustDown(hp);
}

1. Move, overwrite, delete the top element of the heap, and rebuild the heap
2. Try to keep the relationship unchanged (exchange the first and last data, then delete the tail data, and adjust the heap downward)< /span>

3. Get the top data of the heap

// 取堆顶的数据
HPDataTypeHeapTop(Heap* hp)
{
    
    
    assert(hp->_a);
    assert(!HeapEmpty(hp));//断言如果不是空为真就执行
    return hp->_a[0];
}

The top data of the heap is the first element

1.2.6 Heap empty judgment, number of data in the heap and heap destruction

1. Number of data in the heap

// 堆的数据个数
int HeapSize(Heap* hp)
{
    
    
    assert(hp);
    return hp->_size;
}

The number of data in the heap is the number of _size

2. The empty judgment of the heap

// 堆的判空
int HeapEmpty(Heap* hp)
{
    
    
    assert(hp);
    return hp->_size == 0;
}

_size is 0, indicating that the heap is empty

3. Heap destruction

// 堆的销毁
void HeapDestory(Heap* hp)
{
    
    
    assert(hp);
    free(hp->_a);
    hp->_a = NULL;
    hp->_capacity = hp->_size = 0;
}

Open a space (malloc) and release the space (free) before the program ends.

1.2.7 Code implementation of heap

.h header file (declaration)

#pragma once
#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
typedef int HPDataType;
typedef struct Heap
{
    
    
	HPDataType* _a;//动态数组
	int _size;//存储数据的下标
	int _capacity;//动态数组的容量
}Heap;
//堆的初始化
void HeapInit(Heap* hp);
// 堆的销毁
void HeapDestory(Heap* hp);
// 堆的插入
void HeapPush(Heap* hp, HPDataType x);
// 堆的删除
void HeapPop(Heap* hp);
// 取堆顶的数据
HPDataTypeHeapTop(Heap* hp);
// 堆的数据个数
int HeapSize(Heap* hp);
// 堆的判空
int HeapEmpty(Heap* hp);

.c source file (definition)

#include "Heap.h"
// 堆的构建
void HeapInit(Heap* hp)
{
    
    
    hp->_a = (HPDataType*)malloc(sizeof(HPDataType) * 4);
    if (hp->_a == 0)
    {
    
    
        printf("malloc is error\n");
        exit(-1);
    }
    hp->_capacity = 4;
    hp->_size = 0;
}
//向上调整算法
HeapJustUp(HPDataType a[], HPDataType child)
{
    
    
    int parsent;
    parsent = (child - 1) / 2;//找到孩子的父亲
    while (child > 0)
    {
    
    
        int tmp = 0;
        if (a[parsent] < a[child])//孩子比父亲的值大,
        {
    
    
            tmp = a[child];
            a[child] = a[parsent];
            a[parsent] = tmp;
        }
        else
            break;
        child = parsent;
        parsent = (parsent - 1) / 2;//找到孩子的父亲
    }
}
// 堆的插入
void HeapPush(Heap* hp, HPDataType x)
{
    
    
    //数据满了,需要扩容
    if (hp->_capacity == hp->_size)
    {
    
    
        HPDataType* tmp = (HPDataType*)realloc(hp->_a, sizeof(HPDataType)*hp->_capacity * 2);
        if (tmp == NULL)
        {
    
    
            printf("realloc is error");
            exit(-1);
        }
        hp->_a = tmp;
        hp->_capacity = hp->_capacity * 2;
    }
    //不需要扩容
    hp->_a[hp->_size++] = x;//插入数据,然后_size+1
    //一般数据都是放到数组尾得,建堆,向上调整,这里我们建大堆
    HeapJustUp(hp->_a, hp->_size - 1);
}
// 堆的判空
int HeapEmpty(Heap* hp)
{
    
    
    assert(hp);
    return hp->_size == 0;
}
//堆顶元素进行向下调整
void HeapJustDown(Heap* hp)
{
    
    
    //先假设当前待调整结点的左孩子结点存在
        //并且是待调整结点的左右孩子结点(不管右孩子结点存不存在,都这样假设)中值最大的
    int parent = 0;//根节点
    int child = parent * 2 + 1;//孩子结点
        while (child < hp->_size)
        {
    
    
            //child+1 < hp->_size说明右孩子结点确实存在
            //如果hp->_a[child] < hp->_a[child+1]也成立,那说明左右孩子结点中值最大的是右孩子结点
            if ((child + 1 < hp->_size) && hp->_a[child] < hp->_a[child + 1])
            {
    
    
                child = child + 1;
            }
            //如果a[child]>a[parent],则说明父节点比比左右孩子节点的值都要小,要置换
            if (hp->_a[child] > hp->_a[parent])
            {
    
    
                int tmp = hp->_a[parent];
                hp->_a[parent] = hp->_a[child];
                hp->_a[child] = tmp;
                parent = child;
                child = child * 2 + 1;
            }
            //如果a[child] <= a[parent],那就不需要进行调整
            else
            {
    
    
                break;
            }
        }
}
// 堆的删除,从堆顶开始删
void HeapPop(Heap* hp)
{
    
    
assert(hp);//断言为空为假的话就报错
assert(!HeapEmpty(hp));//断言如果不是空为真就执行
//首元素的的值与尾元素交换,然后删除尾元素
int tmp = hp->_a[0];
hp->_a[0] = hp->_a[hp->_size - 1];
hp->_a[hp->_size - 1] = tmp;
hp->_size--;
//堆顶元素进行向下调整
HeapJustDown(hp);
}
// 取堆顶的数据
HPDataTypeHeapTop(Heap* hp)
{
    
    
    assert(hp->_a);
    assert(!HeapEmpty(hp));//断言如果不是空为真就执行
    return hp->_a[0];
}
// 堆的数据个数
int HeapSize(Heap* hp)
{
    
    
    assert(hp);
    return hp->_size;
}
// 堆的销毁
void HeapDestory(Heap* hp)
{
    
    
    assert(hp);
    free(hp->_a);
    hp->_a = NULL;
    hp->_capacity = hp->_size = 0;
}

.c source file (test)

#include "Heap.h"
int main()
{
    
    
    Heap hp;
    HeapInit(&hp);//初始化
    HeapPush(&hp, 2);//插入数据
    HeapPush(&hp, 3);
    HeapPush(&hp, 4);
    HeapPush(&hp, 5);
    HeapPush(&hp, 6);
    HeapPush(&hp, 1);
    HeapPush(&hp, 66);
    HeapPush(&hp, 62);
    HeapPush(&hp, 4);
    HeapPush(&hp, 6);
    HeapPop(&hp);//删除数据,从堆顶开始删
   int tmp= HPDataTypeHeapTop(&hp);//取堆顶元素
    // 堆的数据个数
   int num = HeapSize(&hp);
   printf("建大堆,栈顶元素为:%d,堆的数据个数:%d\n", tmp,num);
    for (int i = 0; i < num; i++)
        printf("%d ", hp._a[i]);
    HeapDestory(&hp);// 堆的销毁
    return 0;
}

2. TOP-K problem

TOP-K problem: Find the top k largest elements and smallest elements in the data set. Generally, the data is very large.
Such as: top 10 in the profession, top 500 in the world, top 100 active players in the game, various lists, etc.

1. Use the first k elements in the data set to build a heap
Find the top k largest elements and build a small heap
Find the top k minimum elements, build a large heap
2. Use the remaining N-K elements to compare with the top element of the heap in sequence, replace the top element of the heap according to the rules, and N-K elements are sequentially compared with the heap element After the top element comparison is completed, the K elements in the heap will be the minimum or maximum element required.

example:

Question: Assume that there are 100 million numbers that cannot be stored in the memory. Find the largest k numbers in the file.
1. Read the first 10 data of the file and create a small heap in the memory array.
2. Read the remaining data in sequence, compare it with the top element of the heap, replace it with data larger than the top of the heap, and then adjust it downwards.
3. After all the data has been read, the data in the heap will be the top 10 largest numbers.

#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define n 100000000
void  WeinteData()//写入1亿数据
{
    
    
    
FILE* fp = fopen("top.txt", "w");//打开文件,只写
if (fp == NULL)
{
    
    
    perror("fopen error");
    exit(-1);
}
srand((unsigned)time(0));
int arr[100] = {
    
     0 };
for (int i = 0; i < n; i++)
{
    
    
    int x = rand() % 10000 + 1;
    fprintf(fp, "%d\n", x);
}
fclose(fp);//关闭文件
}
//两个数交换
void Swap(int* p, int* q)
{
    
    
    int tmp;
    tmp = *q;
    *q = *p;
    *p = tmp;
}
//向下调整算法
void JustDown(int* arr,int k,int parent)
{
    
    
    int child = parent * 2 + 1;//左孩子结点
    while (child < k)
    {
    
    
        if ((child + 1 < k) && arr[child] > arr[child + 1])//找到最小值的孩子结点
            child += 1;
        //如果arr[child]<arr[parent],则说明父节点比比左右孩子节点的值都要大,要置换
        if (arr[child] < arr[parent])
        {
    
    
            Swap(&arr[child], &arr[parent]);
           //让孩子结点为父节点,并且更新它的儿子结点
            parent = child;
            child = child * 2 + 1;
        }
        //如果a[child] <= a[parent],那就不需要进行调整
        else
        {
    
    
            break;
        }
    }
}
//建小堆
void HeapCreate(int* arr,int k)
{
    
    
    //最后一个结点的父亲结点开始向下调整
    for (int i = (k - 2) / 2; i >= 0; --i)
    {
    
    
        //向下调整算法
        JustDown(arr, k, i);
    }
}
void  FileTakeK()
{
    
    
    int k = 10;//10个数
    int* a = (int*)malloc(sizeof(int) * k);//开辟一块空间用来建堆
    if (a == NULL)
    {
    
    
        perror("malloc error:");
        exit(-1);
    }
    FILE* file = fopen("top.txt", "r");//打开top.txt文件,只读模式
    if (file == NULL)
    {
    
    
        perror("fopen error:");
        exit(-1);
    }
    for (int i = 0; i < k; i++)
    {
    
    
        fscanf(file, "%d", &a[i]);
    }
    printf("前10个数:\n");
    for (int i = 0; i < k; i++)
        printf("%d ", a[i]);
    //建小堆
    HeapCreate(a, k);
    printf("\n建完小堆里面的数:\n");
    for (int i = 0; i < k; i++)
        printf("%d ", a[i]);
    //把剩余的n-k个数与小堆的堆顶比较,比较完成后,堆里的数就是文件里最大的10个数
    int x = 0;
    while (fscanf(file, "%d", &x) != EOF)
    {
    
    
        //比堆顶数大,把这个数赋值给堆顶,然后向下调整
        if (x > a[0])
            a[0] = x;
        JustDown(a, k, 0);
    }
    printf("\n取最大的10个数:\n");
    for (int i = 0; i < k; i++)
        printf("%d ", a[i]);
    free(a);//释放内存
    fclose(file);//关闭文件
}
int main()
{
    
    
    //写入1亿数据
   WeinteData();
       //从文件中取出k个数,建小堆
    FileTakeK();
    return 0;
}

Guess you like

Origin blog.csdn.net/plj521/article/details/134414584