Huffman tree construction and coding

Table of contents

1. What is a Huffman tree?

2. Construction process of Huffman tree

1. Process analysis

2. Process code implementation

Select function

3. Implementation of Huffman coding

Complete code

Summarize


1. What is a Huffman tree?

Huffman tree, also known as optimal tree, is a type of tree with the shortest weighted path length and is widely used in practical applications. The definition of Huffman tree involves concepts such as path, path length, and weight.


2. Construction process of Huffman tree

1. Process analysis

1. Find the two trees with the smallest node weights from a forest and construct a new binary tree as the left and right subtrees, and the already selected trees will no longer be selected.

2. Compare the newly constructed binary tree with the tree in the forest again, and repeat the process 1

3. Finally merge into a binary tree

2. Process code implementation

        The first is to initialize a binary tree to store the binary tree to be constructed next. During the initialization process, 2n units should be dynamically allocated, and then looped 2n-1 times to initialize all units in the unit to 0.

        The second step is to store the weights of the trees in the forest into the created tree.

        Then start creating the Huffman tree

code show as below:

void CreateHaffmanTree(HaffmanTree &ht,int n)
{
	if (n <= 1) return ;
	int m = 2 * n - 1;
	ht = (HaffmanTree)malloc(sizeof(htNode)*(m+1));
	for (int i = 1; i <= m; i++)//初始化双亲结点和孩子节点的值
	{
		ht[i].parent = 0;
		ht[i].lchild = 0;
		ht[i].rchild = 0;
	}
	for (int i = 1; i <= n; i++)//将权值进行存储
	{
		int x;
		scanf("%d", &x);
		ht[i].weight = x;
	}
	//开始创建哈夫曼树
	for (int i = n+1; i <= m; i++)
	{
		int l=0, r=0;
		Select(ht, i-1, l,  r);//调用Select函数返回最小的两颗树的权值,并且返回下标
		ht[i].weight = ht[l].weight + ht[r].weight;
		ht[i].lchild = l;//将返回的下标作为新构造树的的左右孩子
		ht[i].rchild = r;
		ht[l].parent = i;//将新构造树的结点作为最小的两个树的双亲结点
		ht[r].parent = i;
	}
	visit(ht,m);
}

Select function

This function returns the weights and subscripts of the two smallest trees.

code show as below:

void Select(HaffmanTree ht, int n, int &l, int &r)//此函数用于返回权值最小的两个数,并且返回这两个数的节点作为孩子节点
{
	int min1 = 9999999;
	int min2 = 999999;

	for (int i = 1; i <= n; i++)
	{
		if (ht[i].weight < min1 && ht[i].parent == 0)//返回最小的权值的节点作为左孩子
		{
			min1 = ht[i].weight;
			l = i;
		}
	}
	for (int i = 1; i <= n; i++)
	{
		if (ht[i].weight < min2 && ht[i].parent == 0)//返回第二小的权值的节点作为右孩子
		{
			int t = i;
			if (l!=t)//在这里卡了好久,没想到用下标来判断一组数据中最小的两个数
			{
				min2 = ht[i].weight;
				r = i;
			}
		}
	}
}

3. Implementation of Huffman coding

        After constructing the Huffman tree, the main idea of ​​finding Huffman coding is to take the leaf nodes as the starting point and trace upward to the root node. Backtracking is to stipulate that the left child is 0 and the child is 1

        Since each Huffman code is a variable-length code, a pointer is used to store the first address of each string code.

code show as below:

void CreatHaffmanCode(HaffmanTree ht, HaffmanCode& hc, int n)
{
	hc = (HaffmanCode)malloc(sizeof(char*) * (n + 1));//分配存储n个编码的空间,其实用不到n个空间,但是为了想用的时候有,直接开辟n个空间就行
	char *cd = (char*)malloc(sizeof(char) * n);  //分配临时存放字符编码的动态数组空间
	cd[n - 1] = '\0';  //编码结束符
	for (int i = 1; i <= n; i++)
	{
		int start = n - 1;//回溯的过程是由下往上的,所以存储的时候直接由后往前存
		int c = i;//标记点,用来判断左右孩子
		int f = ht[i].parent;//f直接指向c的双亲结点,
		while (f!=0)
		{
			start--;
			if (ht[f].lchild == c)//找到第一个节点的双亲结点后判断左右孩子
			{
				cd[start] = '0';
			}
			else
			{
				cd[start] = '1';
			}
			c = f; 
			f = ht[f].parent;//递归的思想,继续往上回溯
		}
		hc[i] = (char*)malloc((n - start) * sizeof(char));//为第i个字符分配空间,即为了保存第i个字符的编码
		strcpy(hc[i], &cd[start]);//将求到的编码复制到ht空间去
	}
	free(cd);//释放临时空间
	visitCode(ht, hc, n);//开始打印
}

Complete code

#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <malloc.h>
#include <string.h>
typedef struct
{
	int weight;
	int parent;
	int lchild;
	int rchild;
}htNode, * HaffmanTree;

typedef char** HaffmanCode;

void Select(HaffmanTree ht, int n, int &l, int &r)//此函数用于返回权值最小的两个数,并且返回这两个数的节点作为孩子节点
{
	int min1 = 9999999;
	int min2 = 999999;

	for (int i = 1; i <= n; i++)
	{
		if (ht[i].weight < min1 && ht[i].parent == 0)//返回最小的权值的节点作为左孩子
		{
			min1 = ht[i].weight;
			l = i;
		}
	}
	for (int i = 1; i <= n; i++)
	{
		if (ht[i].weight < min2 && ht[i].parent == 0)//返回第二小的权值的节点作为右孩子
		{
			int t = i;
			if (l!=t)//在这里卡了好久,没想到用下标来判断一组数据中最小的两个数
			{
				min2 = ht[i].weight;
				r = i;
			}
		}
	}
}

void visit(HaffmanTree ht, int m)
{
	for (int i = 1; i <= m; i++)
	{
		printf("%-12d%-12d%-12d%-12d\n", ht[i].weight, ht[i].parent, ht[i].lchild, ht[i].rchild);
	}
}

void CreateHaffmanTree(HaffmanTree &ht,int n)
{
	if (n <= 1) return ;
	int m = 2 * n - 1;
	ht = (HaffmanTree)malloc(sizeof(htNode)*(m+1));
	for (int i = 1; i <= m; i++)//初始化双亲结点和孩子节点的值
	{
		ht[i].parent = 0;
		ht[i].lchild = 0;
		ht[i].rchild = 0;
	}
	for (int i = 1; i <= n; i++)//将权值进行存储
	{
		int x;
		scanf("%d", &x);
		ht[i].weight = x;
	}
	//开始创建哈夫曼树
	for (int i = n+1; i <= m; i++)
	{
		int l=0, r=0;
		Select(ht, i-1, l,  r);
		ht[i].weight = ht[l].weight + ht[r].weight;
		ht[i].lchild = l;
		ht[i].rchild = r;
		ht[l].parent = i;
		ht[r].parent = i;
	}
	printf("初始权值    双亲结点    左孩子    右孩子\n");
	visit(ht,m);
}


void visitCode(HaffmanTree ht, char**hc,int n)
{

	printf("--------------------哈夫曼树的编码-------------------\n");
	printf("初始权值  哈夫曼编码\n");
	for (int i = 1; i <= n; i++)
	{
		printf("%-12d%-12s%\n", ht[i].weight, hc[i]);
	}
}


void CreatHaffmanCode(HaffmanTree ht, HaffmanCode& hc, int n)
{
	hc = (HaffmanCode)malloc(sizeof(char*) * (n + 1));//分配存储n个编码的空间,其实用不到n个空间,但是为了想用的时候有,直接开辟n个空间就行
	char *cd = (char*)malloc(sizeof(char) * n);  //分配临时存放字符编码的动态数组空间
	cd[n - 1] = '\0';  //编码结束符
	for (int i = 1; i <= n; i++)
	{
		int start = n - 1;//回溯的过程是由下往上的,所以存储的时候直接由后往前存
		int c = i;//标记点,用来判断左右孩子
		int f = ht[i].parent;//f直接指向c的双亲结点,
		while (f!=0)
		{
			start--;
			if (ht[f].lchild == c)//找到第一个节点的双亲结点后判断左右孩子
			{
				cd[start] = '0';
			}
			else
			{
				cd[start] = '1';
			}
			c = f; 
			f = ht[f].parent;//递归的思想,继续往上回溯
		}
		hc[i] = (char*)malloc((n - start) * sizeof(char));//为第i个字符分配空间,即为了保存第i个字符的编码
		strcpy(hc[i], &cd[start]);//将求到的编码复制到ht空间去
	}
	free(cd);//释放临时空间
	visitCode(ht, hc, n);//开始打印
}

int main()
{
	HaffmanTree ht;
	HaffmanCode hc;
	int n ;
	printf("请输入n个数的权值: ");
	scanf("%d", &n);
	printf("--------------------哈夫曼树的构造-------------------\n");
	printf("请输入每个数的权值:");
	int l = 0, r = 0;
	CreateHaffmanTree(ht, n);
	printf("\n");
	CreatHaffmanCode(ht, hc, n);
	return 0;
}


Summarize

The above is the implementation of Huffman tree construction and encoding. Many problems were also discovered during this process. For example, I didn’t know how to use subscripts when comparing sizes. I learned a lot during the whole process.

Guess you like

Origin blog.csdn.net/x2656271356/article/details/127489968