算法树9 Huffman Codes

全部每周作业和视频思考题答案和解析见浙江大学数据结构思考题+每周练习答案汇总

题目：In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0's and '1's.

Output Specification:

For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output:

Yes
Yes
No
No

解答：一看到这个看着就有点麻烦题目我也是很郁闷的，何况课程里老师也说这个题比较繁杂。

我第一次尝试使用链的形式来创建树，即左节点右节点这种，但是在做的时候我发现了一些问题，即归并的问题。如果要归并则需要将所有还没有归并的根统计到一起：

#include <iostream>
#include <map>
#include <algorithm>
#include <vector>
using namespace std;
vector<pair<int, char>> myLetters;

struct HaffmanNode{
	pair<int, char> Node;
	HaffmanNode *Left;
	HaffmanNode *Right;
};
HaffmanNode* createHaffmanNode();
HaffmanNode* generateUnion(HaffmanNode *A, HaffmanNode *B);
void getLetters(int N);

int main(void) {
	int N;
	cin >> N;
	
	getLetters(N);
	
	system("pause");
	return 0;
}

void getLetters(int N) {
	for (int i = 0;i < N;i++) {
		pair<int, char> mypair;
		cin >> mypair.second >> mypair.first;
		myLetters.push_back(mypair);
	}
	sort(myLetters.begin(), myLetters.end());

	for (int i=0;i <N;++i)
		cout << "key:" << myLetters[i].first
		<< " value:" << myLetters[i].second << endl;
}

HaffmanNode* createHaffmanNode() {
	HaffmanNode* TreeNode = (HaffmanNode*)malloc(sizeof(struct HaffmanNode));
	TreeNode->Left = NULL;
	TreeNode->Right = NULL;
	return TreeNode;
}
HaffmanNode* generateUnion(HaffmanNode *A, HaffmanNode *B) {
	HaffmanNode* TreeNode = (HaffmanNode*)malloc(sizeof(struct HaffmanNode));
	TreeNode->Left = A;
	TreeNode->Right = B;
	TreeNode->Node.first = '*';
	TreeNode->Node.second = A->Node.second + B->Node.second;
	return TreeNode;
}

generateUnion就是用来进行归并的，然后生成一个新的节点，让其代表字符为星号，表示是一个根。但是这样还得把根进行存储，因为再找的两个最小值可能不包含当前根。太麻烦了，舍弃这种方案。

所以还是用数组建树做吧。

程序比较长，我们先把最终结果给呈现上，然后再分析程序是如何进行构建的。

#include <iostream>
#include <map>
#include <algorithm>
#include <vector>
#include <string>
using namespace std;
map<char,int> myLetters;
struct HaffmanNode{
	char Letter;
	int Count;
	int Parent;
	int Left;
	int Right;
};
int Nums;
vector<HaffmanNode> myTree;
HaffmanNode createNode();
void getLetters(int N);
void buildTree(int N);
int findMin();
void PrintTree(void);
int getLength();
struct HaffmanList;
void generateTree(int N,int length);
int main(void) {
	int N;
	cin >> N;
	Nums = N;
	getLetters(N);
	buildTree(Nums);
	int Length = getLength();
	int M;
	cin >> M;
	for (int i = 0;i < M;i++) {
		generateTree(N,Length);
	}
	//开始检测别人构建的哈夫曼树

	system("pause");
	return 0;
}

void getLetters(int N) {
	for (int i = 0;i < N;i++) {	
		HaffmanNode myNode = createNode();
		cin >> myNode.Letter >> myNode.Count;
		pair<char, int>myletter;
		myletter.first = myNode.Letter;
		myletter.second = myNode.Count;
		myLetters.insert(myletter);
		myTree.push_back(myNode);
	}
}
HaffmanNode createNode(){
	HaffmanNode newNode;
	newNode.Left = -1;
	newNode.Right = -1;
	newNode.Parent = -1;
	return newNode;
}
HaffmanNode createUnion(int i,int j) {
	HaffmanNode newNode;
	newNode.Left = i;
	newNode.Right = j;
	newNode.Letter = '*';
	newNode.Parent = -1;
	newNode.Count = myTree[i].Count + myTree[j].Count;
	return newNode;
}
int Min(int a, int b) {
	if (myTree[a].Count < myTree[b].Count) {
		return a;
	}
	else return b;
}
int Max(int a, int b) {
	if (myTree[a].Count < myTree[b].Count) {
		return b;
	}
	else return a;
}
int findMin() {
	int n1 = 100000, n2 = 100000;
	int i;
	for (i = 0;i < Nums;i++) {
		if (myTree[i].Parent < 0) {
			n1 = i;
			break;//Fuck!之前不小心把这个break写在if的外面了
		}	
	}
	for (i = n1+1;i < Nums;i++) {
		if (myTree[i].Parent < 0) {
			n2 = i;
			break;
		}	
	}
	if (n1 == 100000 || n2 == 100000) {
		return 0;
	}else {
		int max = myTree[Max(n1,n2)].Count;
		for (i = n2 + 1;i < Nums;i++) {
			if (myTree[i].Parent < 0) {
				if (myTree[i].Count < max) {
					if (Max(n1, n2) == n1)n1 = i;
					else n2 = i;
					max = myTree[Max(n1, n2)].Count;
				}
			}

		}
		//现在得到了Count最小的两个坐标。
		HaffmanNode Union = createUnion(n1,n2);
		myTree[n1].Parent = Nums;
		myTree[n2].Parent = Nums;
		myTree.push_back(Union);
		Nums++;
		return 1;
	}

}
void buildTree(int N) {
	while (findMin()) {
		//PrintTree();
	}

}
void PrintTree(void) {
	cout << endl;
	for (int i = 0;i < Nums;i++) {
		cout << i<<" "<< myTree[i].Parent << " " << myTree[i].Count << " "
			<< myTree[i].Letter << endl;
	}
	cout << endl;
}
int Length(int i,int depth) {
	if (myTree[i].Left == -1 && myTree[i].Right == -1) {
		return myTree[i].Count*depth;
	}
	else {
		return Length(myTree[i].Left,depth+1) + Length(myTree[i].Right, depth + 1);
	}
}
int getLength() {
	int parent;
	for (int i = 0;i < Nums;i++) {
		if (myTree[i].Parent < 0) {
			parent = i;
		}
	}
	return Length(parent,0);
}
// *******************下面是检测用的数据和函数：********************************
struct HaffmanList {
	char Letter;
	int Count;
	HaffmanList *Left;
	HaffmanList *Right;
};
HaffmanList* createHaffmanList() {
	HaffmanList* TreeNode = (HaffmanList*)malloc(sizeof(struct HaffmanList));
	TreeNode->Left = NULL;
	TreeNode->Right = NULL;
	return TreeNode;
}
int leafNum;
int Length(HaffmanList* aList,int depth) {
	if (aList == NULL)return 0;
	if (aList->Left == NULL && aList->Right == NULL) {
		leafNum++;
		return depth*aList->Count;
	}
	else {
		return Length(aList->Left, depth + 1) + Length(aList->Right, depth + 1);
	}
}
void generateTree(int N,int length) {

	HaffmanList* aList = createHaffmanList();
	for (int i = 0;i < N;i++) {	
		HaffmanList* NodeIps = aList;
		char le;
		cin >> le;
		string s;
		cin >> s;
		for (int i = 0;i < s.length();i++) {
			if (s[i] == '0') {
				if (NodeIps->Left == NULL) {
					HaffmanList* tempList = createHaffmanList();
					NodeIps->Left = tempList;
				}
				NodeIps = NodeIps->Left;
			}
			else {
				if (NodeIps->Right == NULL) {
					HaffmanList* tempList = createHaffmanList();
					NodeIps->Right = tempList;
				}
				NodeIps = NodeIps->Right;
			}
		}
		NodeIps->Count = myLetters[le];
	}
	//测试叶节点个数
	
	//测试编码量
	leafNum = 0;
	int Length2 = Length(aList,0);
	if (leafNum == N && Length2 == length) {
		cout << "Yes"<<endl;
	}
	else {
		cout << "No" << endl;
	}
}

测试结果如下：

首先是建树，我们建立到结构体数组里，结构体的Parents变量记录父节点坐标。

在合并的时候我们需要找到Count最小的两个节点，然后合并。要注意的是这两个节点的Parents必须是-1，否则说明已经被合并过了。

首先设置一个非常大的i，如果找完整个数组，只找到一个Parents为-1的点，说明合并已经完成。

然后统计最优编码的总长度。PrintTree函数用来打印我们自己建立的树的信息。

然后进行检验，我们在每一行中读入的字符串，比如00101，第一个是0，我们往左建子树，第二个是0，还是往左建子树，然后往右建子树，以此类推。如果节点为NULL就创建新的节点，如果不为NULL就直接向下递进。建好树以后判断叶子的数量，如果为N说明可以，然后判断是不是最优建树。

建立哈夫曼树这里用了两种结构，一个是链式，一个是数组式的。

其实肯定建立哈夫曼树还有更简单的思路的方法，大家有兴趣可以自己尝试。

Dezeming

发布了174 篇原创文章 · 获赞 394 · 访问量 1万+

私信关注