Huffman编码的原理:
以字符的使用频率作为权构建一棵哈夫曼树,然后利用哈夫曼树对字符进行编码。构造一棵哈夫曼树,是将所要编码的字符作为叶子结点,该字符在文件中的使用频率作为叶子结点的权值,以“自底向上”的方式,通过n-1次“合并”运算后构造出的一棵树。
核心思想:权值越大的叶子离根越远。
贪心策略:每次从树的集合中取出没有双亲且权值最小的两棵树作为左右子树。
先构建一棵Huffman树:
(1)先找到权值最小且没有父亲的两个结点进行合并,创建一棵树,然后循环创建。
设置结构体:
typedef struct{
double weight;
int parent;
int lchild;
int rchild;
char value;
} HNodeType;//节点结构体
typedef struct{
int bit[MAXBIT];
int start;//反向存储,正向输出
} HcodeType; //编码结构体
HNodeType HuffNode[MAXNODE];
HcodeType HuffCode[MAXLEAF];
初始化:
int i,j;
int x1, x2;//两个最小权值结点在数组中的序号
double m1, m2;//两个最小权值结点的权值
//初始化哈夫曼数组中的结点
for(i = 0; i <= 2*n - 1; ++ i)
{
HuffNode[i].weight = 0;//权值
HuffNode[i].parent = -1;
HuffNode[i].lchild = -1;
HuffNode[i].rchild = -1;
}
//输入n个叶子结点的名称和权值
for(i = 0; i < n; ++i)
{
cout << "please input value and weight of leaf node:" << i +1;
cin >> HuffNode[i].value >> HuffNode[i].weight;
}
构造Huffman树
//构造Huffman树(核心)
for(i = 0 ; i < n-1; ++i)
{
m1 = m2 = MAXVALUE;//初始化为最大值;
x1 = x2 = -1;
//找出所有结点中权值最小且无父亲的两个结点,并合并成一棵二叉树
for(j = 0 ; j < n+i; ++j)
{
if(m1 > HuffNode[j].weight && HuffNode[j].parent == -1)
{
m2 = m1;
x2 = x1;
m1 = HuffNode[j].weight;
x1 = j;
}
else if(m2 > HuffNode[j].weight && HuffNode[j].parent == -1)
{
m2 = HuffNode[j].weight;
x2 = j;
}
}
//设置找到的两个结点的父节点的信息
HuffNode[n+i].weight = m1 + m2;
HuffNode[n+i].lchild = x1;
HuffNode[n+i].rchild = x2;
HuffNode[x1].parent = n+i;
HuffNode[x2].parent = n+i;
cout << "x1.weight and x2.weight in round" << i+1 <<"\t"
<< HuffNode[x1].weight << "\t" << HuffNode[x2].weight << endl;
}
2. 输出哈夫曼编码
void HuffmanCode(HcodeType HuffCode[MAXLEAF], int n)
{
HcodeType cd;//定义一个临时变量来存放求解编码时的信息
int i,j,c,p;
for(i = 0 ; i < n; ++ i)
{
cd.start = n-1;
c = i;//c的编号
p = HuffNode[c].parent;//c的父亲结点的编号
while(p != -1)
{
if(HuffNode[p].lchild == c)
cd.bit[cd.start] = 0;
else if(HuffNode[p].rchild == c)
cd.bit[cd.start] = 1;
cd.start --;
c = p;
p = HuffNode[p].parent;
}
//把叶子结点的编码信息从临时编码cd复制出来,放入编码结构体数组
for(j = cd.start+1; j < n; ++ j)
HuffCode[i].bit[j] = cd.bit[j];
HuffCode[i].start = cd.start;
}
}
合并后的代码:
#include<iostream>
using namespace std;
#define MAXBIT 100
#define MAXVALUE 10000
#define MAXLEAF 30
#define MAXNODE MAXLEAF*2-1
typedef struct{
double weight;
int parent;
int lchild;
int rchild;
char value;
} HNodeType;//节点结构体
typedef struct{
int bit[MAXBIT];
int start;//反向存储,正向输出
} HcodeType; //编码结构体
HNodeType HuffNode[MAXNODE];
HcodeType HuffCode[MAXLEAF];
/* 构造哈夫曼树:先找到权值最小且无父亲的两个节点,合并成新的节点*/
void HuffmanTree(HNodeType HuffNode[MAXNODE], int n)
{
int i,j;
int x1, x2;//两个最小权值结点在数组中的序号
double m1, m2;//两个最小权值结点的权值
//初始化哈夫曼数组中的结点
for(i = 0; i <= 2*n - 1; ++ i)
{
HuffNode[i].weight = 0;//权值
HuffNode[i].parent = -1;
HuffNode[i].lchild = -1;
HuffNode[i].rchild = -1;
}
//输入n个叶子结点的名称和权值
for(i = 0; i < n; ++i)
{
cout << "please input value and weight of leaf node:" << i +1;
cin >> HuffNode[i].value >> HuffNode[i].weight;
}
//构造Huffman树(核心)
for(i = 0 ; i < n-1; ++i)
{
m1 = m2 = MAXVALUE;//初始化为最大值;
x1 = x2 = -1;
//找出所有结点中权值最小且无父亲的两个结点,并合并成一棵二叉树
for(j = 0 ; j < n+i; ++j)
{
if(m1 > HuffNode[j].weight && HuffNode[j].parent == -1)
{
m2 = m1;
x2 = x1;
m1 = HuffNode[j].weight;
x1 = j;
}
else if(m2 > HuffNode[j].weight && HuffNode[j].parent == -1)
{
m2 = HuffNode[j].weight;
x2 = j;
}
}
//设置找到的两个结点的父节点的信息
HuffNode[n+i].weight = m1 + m2;
HuffNode[n+i].lchild = x1;
HuffNode[n+i].rchild = x2;
HuffNode[x1].parent = n+i;
HuffNode[x2].parent = n+i;
cout << "x1.weight and x2.weight in round" << i+1 <<"\t"
<< HuffNode[x1].weight << "\t" << HuffNode[x2].weight << endl;
}
}
/*哈夫曼编码:从叶子结点开始,查看其是否有父亲结点,如果有,查看是其父亲结点的
左孩子还是右孩子,左孩子编码为0,右孩子编码为1*/
void HuffmanCode(HcodeType HuffCode[MAXLEAF], int n)
{
HcodeType cd;//定义一个临时变量来存放求解编码时的信息
int i,j,c,p;
for(i = 0 ; i < n; ++ i)
{
cd.start = n-1;
c = i;//c的编号
p = HuffNode[c].parent;//c的父亲结点的编号
while(p != -1)
{
if(HuffNode[p].lchild == c)
cd.bit[cd.start] = 0;
else if(HuffNode[p].rchild == c)
cd.bit[cd.start] = 1;
cd.start --;
c = p;
p = HuffNode[p].parent;
}
//把叶子结点的编码信息从临时编码cd复制出来,放入编码结构体数组
for(j = cd.start+1; j < n; ++ j)
HuffCode[i].bit[j] = cd.bit[j];
HuffCode[i].start = cd.start;
}
}
int main()
{
int i,j,n;
cout << "please input n: " << endl;
cin >> n;
HuffmanTree(HuffNode,n);//构造哈夫曼树
HuffmanCode(HuffCode,n);//哈夫曼树编码
//输出已保存好的所有存在编码的哈夫曼编码(叶子节点编码)
for(i = 0; i < n; ++i)
{
cout << HuffNode[i].value << ":Huffman code is:";
for(j = HuffCode[i].start+1; j < n; ++j)
cout << HuffCode[i].bit[j];
cout << endl;
}
return 0;
}