Huffman tree establishment, coding and decoding

Huffman tree establishment, coding and decoding

1. Demand analysis
Design task: Set the character set to 26 English letters, and their frequency of appearance is shown in the table below.
Insert picture description here

Programming is implemented
(1) first build a Huffman tree,
(2) use this tree to encode and decode the message "this program is my favorite".
(3) Input and output form: input characters and weights, create a Huffman tree, output its corresponding weights, weight, parent, lchild, rchild, and then input the message this program is my favorite, after the output encoding is completed Enter this string of binary numbers into the decoding part, and the decoding is completed to get this program is my favorite.
(4) Program function: Create a Huffman tree and encode and decode it.
(5) Test case:
/*
27
186
a 64
b 13
c 22
d 32
e 103
f 21
g 15
h 47
i 57
j 1
k 5
l 32
m 20
n 57
o 63
p 15
q 1
r 48
s 51
t 80
u 23
v 8
w 8
x 1
y 6
z 1
27
this program is my favorite
1101000101110110111100101001010101001000010101111001111101110110111110011001111011100000101100111111010001001111101010

*/
Note : The layout is messy when the program is run with Visual Stdio, and it can run normally with VC++ .

The complete code is as follows:

/*用VC++运行,不用Visual Studio*/
#include<malloc.h>
#include<string.h>
#include<stdio.h>

typedef struct {
    
    
	unsigned int weight;
	unsigned int parent, lchild, rchild;
}HTNode, *HuffmanTree;    //动态分配数组存储哈夫曼树
typedef char **HuffmanCode;    //动态分配数组存储哈夫曼编码表

void Select(HuffmanTree HT, int i, int *s1, int *s2) {
    
    
	//在HT[0..i-1]找出parent为0且weight最小的两个结点,其序号分别为s1和s2
	int min1, min2, k, j = 0;//k控制HT数组,j保证比较进行下去
	for (k = 0; k <= i - 1; ++k) {
    
    
		if (HT[k].parent == 0) {
    
    
			if (j == 0) {
    
    
				min1 = HT[k].weight;  *s1 = k;
			}
			else {
    
    //始终能保证min1是最小的,min2第二小
				if (HT[k].weight < min1) {
    
    
					min2 = min1;  *s2 = *s1;
					min1 = HT[k].weight;  *s1 = k;
				}
				else if (HT[k].weight >= min1 && HT[k].weight < min2) {
    
    
					min2 = HT[k].weight;  *s2 = k;
				}
			}
			++j;
		}
	}
}
void CreateHuffmanTree(HuffmanTree&HT, int *w, int n) {
    
    
	if (n <= 1)return;
	int m = 2 * n - 1;    //赫夫曼树有m个结点,m控制数组开多大,叶子节点有n个,哈夫曼总结点数为2n-1个
	HT = (HuffmanTree)malloc(m * sizeof(HTNode));
	HuffmanTree p = HT;
	int i;
	for (i = 0; i < n; ++i, ++p, ++w) {
    
    //给定的权值输入进去
		p->weight = *w;
		p->parent = 0; p->lchild = 0; p->rchild = 0;
	}
	for (; i < m; ++i, ++p) {
    
    //其余全赋为0,此时i接着上一个for循环下来的i继续
		p->weight = 0;
		p->parent = 0; p->lchild = 0; p->rchild = 0;
	}

	for (i = n; i < m; ++i) {
    
        //建哈夫曼树 
		//在HT[0..i-1]选择parent为0且weight最小的两个结点,其序号分别为s1和s2
		int s1, s2;
		Select(HT, i, &s1, &s2);
		HT[s1].parent = i;  HT[s2].parent = i;
		HT[i].lchild = s1;  HT[i].rchild = s2;
		HT[i].weight = HT[s1].weight + HT[s2].weight;//从下往上建立,导致头节点是HT[2n-2]
	}
}
void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, int *w, int n) {
    
    
	//w存放n个字符的权值(均>0),构造哈夫曼树HT,并求出n个字符的哈夫曼编码HC
	int i;
	//---从叶子到根逆向求每个字符的哈夫曼编码---
	HC = (HuffmanCode)malloc(n * sizeof(char *));
	char *cd = (char*)malloc(n * sizeof(char));
	cd[n - 1] = '\0';
	int start, f, c;
	for (i = 0; i < n; ++i) {
    
    
		start = n - 1;
		for (c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent) {
    
    
			if (HT[f].lchild == c) cd[--start] = '0';
			else cd[--start] = '1';
		}
		HC[i] = (char*)malloc((n - start) * sizeof(char));
		strcpy(HC[i], &cd[start]);
	}
	free(cd);
}

int main() {
    
    
	int n, i;
	printf("请输入需编码的字符数目n:\n");
	scanf("%d", &n); fflush(stdin);//清空缓冲区,不清空缓冲区的话上一个字符还在缓冲区内,就产生错误了
	char *c = (char*)malloc(n * sizeof(char));    //存字符 
	int *w = (int*)malloc(n * sizeof(int));    //存权重 
	printf("请输入字符和对应的权值:\n");
	for (i = 0; i < n; i++) {
    
    
		scanf("%c %d", &c[i], &w[i]);
		fflush(stdin);
	}

	HuffmanTree HT;
	HuffmanCode HC;
	CreateHuffmanTree(HT, w, n);
	printf("\n哈夫曼树创建成功\n");
	HuffmanCoding(HT, HC, w, n);
	//输出哈夫曼树
	printf("哈夫曼树为:\n");
	printf("char weight parent lchild rchild\n");
    for (i = 0; i < n; ++i) {
    
    
		printf("%4c %5d %6d %6d %6d\n", c[i], w[i], HT[i].parent, HT[i].lchild, HT[i].rchild);
	}
	//各字符编码
	printf("\n各字符对应的编码为:\n");
	for (i = 0; i < n; ++i) {
    
    
		printf("%c %s\n", c[i], HC[i]);
	}

	//报文编码
	int n1;
	printf("\n");
	printf("请输入报文的字符总数(一个空格也算一个字符):\n");
	scanf("%d", &n1);
	printf("\n");
	printf("请输入报文:\n");
	char *cc = (char*)malloc(n1 * sizeof(char));
	int m;
	for (m = 0; m <= n1; m++) {
    
    
		scanf("%c", &cc[m]);
	}
	printf("编码为:\n");
	for (m = 0; m <= n1; m++) {
    
    
		for (int j = 0; j < 27; j++) {
    
    
			if (cc[m] == c[j]) {
    
    
				printf("%s", HC[j]);
			}
		}
	}
	printf("\n");

	//报文译码
	char str[1000];
	printf("\n请输入要译码的二进制:\n");
	scanf("%s", &str);
	char *s = str;
	int p;
	printf("译码为:\n");
	while (*s != '\0') {
    
    
		p = 2 * n - 2;//数组从0开始的,一共2n-1个节点,所以头节点在数组中编号为2n-2
		while (HT[p].lchild != 0 || HT[p].rchild != 0) {
    
    //有孩子的往下走
			if (*s == '0') p = HT[p].lchild;   //左走,左0右1
			else p = HT[p].rchild;  //右走
			++s;
		}
		printf("%c", c[p]);
	}
	printf("\n");
	printf("\n");

	free(c);
	free(w);
	return 0;
}
/*
27
  186
a 64
b 13
c 22
d 32
e 103
f 21
g 15
h 47
i 57
j 1
k 5
l 32
m 20
n 57
o 63
p 15
q 1
r 48
s 51
t 80
u 23
v 8
w 8
x 1
y 6
z 1
27
this program is my favorite
1101000101110110111100101001010101001000010101111001111101110110111110011001111011100000101100111111010001001111101010

*/

Operation result: For
Insert picture description here
Insert picture description here
Insert picture description here
reference,
think about the Huffman encoding and decoding of data structure (C implementation)

Guess you like

Origin blog.csdn.net/weixin_46020391/article/details/112006630