PTA Huffman Codes --- The use of heap and the establishment of Huffman tree

Huffman Codes

Although this code is not difficult to write, it is also very troublesome. It took more than 4 hours to write. . .
(It’s my own food, I haven’t been proficient in writing code)

  • Relevant information
  1. Title: Huffman Codes
  2. Explanation: Grandma Chen Yue MOOC
  • Although the idea is simple, this is really not easy to implement, and the video only gives the idea. The specific code needs to be completed by yourself. Of course, the data structure video also has a bunch of implementation videos that provide the code.
  • But if you look carefully, you will find that the minimum heap of this question needs to be built directly on the node, not some numbers in the video

head File

#include <stdio.h>
#include <stdlib.h>

#include <string.h>

Definition of structure

  1. Smallest heap structure
  2. Huffman tree structure
#define MINDATA -1
typedef struct TreeNode* HuffmanTree;
typedef struct HNode* Heap; /* 堆的类型定义 */
typedef Heap MinHeap; /* 最小堆 */
// 堆用数组实现  Huffman用树实现
struct HNode {
    
    
	HuffmanTree* Data;  /* 存储元素的数组 */
	int Size;          /* 堆中当前元素个数 */
	int Capacity;      /* 堆的最大容量 */
};
struct TreeNode {
    
    
	int Weight;
	HuffmanTree Left;
	HuffmanTree Right;
};

Minimal heap establishment

I wrote about the establishment of the largest heap, if you don’t understand the heap, please refer to it.

  1. CreateHeap creates a heap
  2. Insert pair inserts a node into the heap
  3. ReadData reads some numbers, turns them into nodes and saves their corresponding frequencies
  4. DeleteMin remove the smallest node from the heap
  5. BuildMinHeap heaps the nodes that have been obtained and arranges them into a heap
MinHeap CreateHeap(int Size)
{
    
     /* 创建容量为Size的空的最小堆 */
	MinHeap H = (MinHeap)malloc(sizeof(struct HNode));
	H->Data = (HuffmanTree*)malloc((Size + 1) * sizeof(HuffmanTree));
	for (int i = 0; i <= Size; i++) {
    
    
		H->Data[i] = (HuffmanTree)malloc(sizeof(struct TreeNode));
	}
	H->Size = 0;
	H->Capacity = Size;
	H->Data[0]->Weight = MINDATA;	/*定义"哨兵",为小于堆中所有可能元素的值*/
	return H;
}

bool IsFull(MinHeap H)
{
    
    
	return (H->Size == H->Capacity);
}

bool Insert(MinHeap H, HuffmanTree T)
{
    
     /* 将元素X插入最小堆H,其中H->Data[0]已经定义为哨兵 */
	int i;
	if (IsFull(H)) {
    
    
		printf("FULL");
		return false;
	}
	i = ++H->Size; /* i指向插入后堆中的最后一个元素的位置 */
	for (; H->Data[i / 2]->Weight > T->Weight; i /= 2) {
    
    
		H->Data[i] = H->Data[i / 2]; /* 上滤X */  // 当比父结点的值小时,让父结点的值下到子结点上
	}
	H->Data[i] = T; /* 将X插入 */
	return true;
}

MinHeap ReadData(MinHeap H, int N, int* fre) {
    
    
	char c;	int f;
	for (int i = 1; i < N + 1; i++) {
    
      // 执行N次
		scanf("%c %d", &c, &f);
		getchar();
		fre[i - 1] = f;
		H->Data[i]->Weight = f;
		H->Data[i]->Left = NULL;	 H->Data[i]->Right = NULL;
		H->Size++;
	}
	return H;
}

int IsEmpty(MinHeap H) {
    
    
	return H->Size == 0;
}

/*取出来最小值(堆顶)*/
HuffmanTree DeleteMin(MinHeap H) {
    
      // 把最后一个结点放到堆顶然后调整
	if (IsEmpty(H)) {
    
    
		printf("EMPTY\n");
		return NULL;
	}
	int Parenet, Child;
	HuffmanTree X;
	HuffmanTree Minitem;
	Minitem = H->Data[1];  // 返回最小值
	X = H->Data[H->Size--];	 // 保存最后一个结点的值并删除一个结点

	// 开始调整堆 找左子树与右子树最小值 进行与X的比较
	for (Parenet = 1; Parenet * 2 <= H->Size; Parenet = Child) {
    
    
		Child = Parenet * 2;
		if ((Child != H->Size) && (H->Data[Child]->Weight > H->Data[Child + 1]->Weight)) {
    
      // 有右结点  且 右结点值小
			Child++;  //Child 指向左右结点最小的值
		}

		if (X->Weight <= H->Data[Child]->Weight)  // 元素比子结点最小值还小时则找到正确的位置
			break;
		else
			H->Data[Parenet] = H->Data[Child];
	}
	H->Data[Parenet] = X;
	return Minitem;
}


void PercDown(MinHeap H, int p)
{
    
     /* 下滤:将H中以H->Data[p]为根的子堆调整为最小堆 */
	int Parent, Child;
	HuffmanTree X;

	X = H->Data[p]; /* 取出根结点存放的值 */
	for (Parent = p; Parent * 2 <= H->Size; Parent = Child) {
    
    
		Child = Parent * 2;
		if ((Child != H->Size) && (H->Data[Child]->Weight > H->Data[Child + 1]->Weight))  // 右结点的值小
			Child++;  /* Child指向左右子结点的较小者 */
		if (X->Weight <= H->Data[Child]->Weight) break; /* 找到了合适位置 */
		else  /* 下滤X */
			H->Data[Parent] = H->Data[Child];  // 讲子结点比父结点小的值放在父结点上
	}
	H->Data[Parent] = X;
}

void BuildMinHeap(MinHeap H)
{
    
     /* 调整H->Data[]中的元素,使满足最小堆的有序性  */
	int i;
	/* 从最后一个结点的父节点开始,到根结点1 */
	for (i = H->Size / 2; i > 0; i--)
		PercDown(H, i);
}

Use the smallest heap to build your own Huffman tree

  1. Find the two smallest nodes from the smallest heap
  2. Use a new node to connect the two nodes as child nodes, and the weight of the new node is the sum of the two child nodes
  3. Put the new nodes in the smallest heap for sorting, for next use
HuffmanTree Huffman(MinHeap H) {
    
    
	int i;	HuffmanTree T;
	BuildMinHeap(H);
	int Size = H->Size;

	for (i = 1; i < Size; i++) {
    
      // 一共进行Size-1次
		T = (HuffmanTree)malloc(sizeof(struct TreeNode));  // 构建一个Huffman树
		T->Left = DeleteMin(H);
		T->Right = DeleteMin(H);
		T->Weight = T->Left->Weight + T->Right->Weight;
		//printf("add weight = %d  %d  %d\n", T->Weight, T->Left->Weight, T->Right->Weight);
		Insert(H, T);
	}
	T = DeleteMin(H);
	return T;
}

Calculate the WPL of the Huffman tree you built

int WPL(HuffmanTree T, int Depth) {
    
    
	if (!T->Left && !T->Right) {
    
      // 为叶结点
		return (Depth * T->Weight);
	}  // 否则定有两个子结点
	return (WPL(T->Left, Depth + 1) + WPL(T->Right, Depth + 1));
}

Regarding the construction and deletion of the code submitted by the students in the question

  1. Create a tree according to the student’s encoding of each character, and delete the tree after each student’s input
  2. After building the tree, judge whether his code is a prefix code
int BulidTree(HuffmanTree* T, char* s) {
    
      // 用测试的code建一个树  建树错误时返回1
	HuffmanTree p = *T;
	int num = strlen(s);//计算字符串长度
	for (int i = 0; i < num; i++) {
    
    
		//printf("i = %d\n", i);
		if (s[i] == '0') {
    
    
			if (!p->Left) {
    
      // 如果子结点不存在
				p->Left = (HuffmanTree)malloc(sizeof(struct TreeNode));
				p->Left->Weight = 0;
				p->Left->Left = NULL;
				p->Left->Right = NULL;
			}
			else if(p->Left->Weight == 1){
    
      // 说明这个地方有放编码的叶结点了,但是还要基于这个叶结点往下建立结点 --- 这个编码必定是错误的
				return 1;
			}
			p = p->Left;
		}
		else if (s[i] == '1') {
    
    
			if (!p->Right) {
    
      // 如果子结点不存在
				p->Right = (HuffmanTree)malloc(sizeof(struct TreeNode));
				p->Right->Weight = 0;
				p->Right->Left = NULL;
				p->Right->Right = NULL;
			}
			else if (p->Right->Weight == 1) {
    
      // 说明这个地方有放编码的叶结点了,但是还要基于这个叶结点往下建立结点 --- 这个编码必定是错误的
				return 1;
			}
			p = p->Right;
		}
	}
	//printf("*********");
	if (p->Left || p->Right) {
    
      // 如果一个编码放到最后发现他不是叶结点  --- 必定是错误的
		return 1;
	}
	p->Weight = 1;
	return 0;
}

void DeleteTree(HuffmanTree T) {
    
    
	if (T->Left)
		DeleteTree(T->Left);
	else if (T->Right)
		DeleteTree(T->Right);
	free(T);
}

Check student input

  1. First read the code of each character of the student
  2. Build a tree to determine whether it is a prefix code
  3. Determine whether it is the smallest code (compare the WPL calculated by the self-built Huffman tree calculated above with the student’s establishment)
int main() {
    
    
	int N;
	scanf("%d", &N);
	getchar();
	int* fre = (int*)malloc(sizeof(int) * N);
	MinHeap H = CreateHeap(N);
	H = ReadData(H, N, fre);  // 仅读入数据
	HuffmanTree T = Huffman(H);
	int CodeLen = WPL(T, 0);
	
	 从此往下为对学生输入的检验
	int M;
	scanf("%d", &M);
	getchar();
	char aa;
	char* s = (char*)malloc(sizeof(char) * (N - 1));  // 保存code
	for (int i = 0; i < M; i++) {
    
    
		int is_huff = 1;  // 1 代表正确
		int Len = 0;
		HuffmanTree T = (HuffmanTree)malloc(sizeof(struct TreeNode));
		T->Weight = 0;  // 正确测试编码建立的数中,叶结点的weigth为1 其他为0
		T->Left = NULL;	T->Right = NULL;
		for (int j = 0; j < N; j++) {
    
    
			scanf("%c %s", &aa, s);
			getchar();
			//printf("j = %d  is_huff = %d  s = %s  aa = %c\n", j, is_huff, s, aa);
			if (is_huff != 0) {
    
      // 当目前给的代码正确时才执行

				int num = strlen(s);// 计算字符串长度
				if (num > N - 1) {
    
      // 得到的Huffman编码最长为N-1
					is_huff = 0;
				}
				Len += num * fre[j];  // 得到的编码长度乘以编码次数
				//printf("num = %d fre = %d\n", num, fre[j]);
				if(BulidTree(&T, s)){
    
      // 建树错误时返回1
					is_huff = 0;
				}
			}
			//printf("is_huffend = %d\n", is_huff);
		}
		//printf("len =  %d", Len);
		if (Len != CodeLen) {
    
    
			is_huff = 0;
		}
		//printf("is_huffend1en = %d\n", is_huff);
		if (is_huff)
			printf("Yes\n");
		else
			printf("No\n");
		DeleteTree(T);
	}
}

Guess you like

Origin blog.csdn.net/qq_43779658/article/details/105590739