哈夫曼编码是一种无损数据压缩算法。类似我们常用的压缩文件,能够减少存储空间
为了确保解码时不产生歧义,这里我们使用前缀码,即没有任何码字是其他码字的前缀。举个例子:
假设有四个字符a,b,c和d,它们对应的编码为00,01,0和1。如果压缩后的编码是0001,解码的时候就会产生歧义,产生的结果有“cccd”或“ccb”或“acd”或“ab”。前缀码就能避免这类情况的发生。
构建哈夫曼编码的步骤:
这里我们采用优先队列也就是最小堆,请看https://blog.csdn.net/qq_36607792/article/details/81210729
1,首先为每一个字符创建一个节点。并构建所有节点的最小堆。
2,从最小堆中选取字符出现频率最小的两个节点。
3,新建一个节点存储两节点的频率之和,这两个节点作这个节点的左右节点。再将此节点插入最小堆中。
4,重复2,3步骤,直到最小堆中只剩一个节点。
接下来我们用一个例子来理解算法
字符:a b c d e
频率: 3 7 2 8 1
1,创建节点,构造最小堆
2,选取频率最小的两个节点e和c
然后将节点3插入到堆中
3,继续选取频率最小的节点
4,
,
以此类推。
c++代码实现
#include <bits/stdc++.h>
using namespace std;
typedef struct HeapNode
{
char ch;
int freq;
HeapNode *left,*right;
}HeapNode;
typedef struct minHeap
{
int size;
int cap;
HeapNode **array;
}MinHeap;
HeapNode *newNode(char ch,int freq)
{
HeapNode *node = new HeapNode;
node->ch = ch;
node->freq = freq;
node->left = node->right = NULL;
return node;
}
MinHeap *createMinHeap(int cap)
{
MinHeap *h = new MinHeap;
h->cap = cap;
h->size = 0;
h->array = new HeapNode*[cap];
return h;
}
void swap(HeapNode **a,HeapNode **b)
{
HeapNode *tmp = *a;
*a = *b;
*b = tmp;
}
void heapify(MinHeap *h,int i)
{
int l = 2*i+1;
int r = 2*i+2;
int smallest = i;
if(l < h->size && h->array[l]->freq < h->array[smallest]->freq)
smallest = l;
if(r < h->size && h->array[r]->freq < h->array[smallest]->freq)
smallest = r;
if(smallest != i)
{
swap(&h->array[smallest],&h->array[i]);
heapify(h,smallest);
}
}
HeapNode *extractMin(MinHeap *h)
{
HeapNode *node = h->array[0];
h->array[0] = h->array[h->size-1];
h->size--;
heapify(h,0);
return node;
}
void insertMinHeap(MinHeap *h,HeapNode *node)
{
if(h->size == h->cap)
{
cout << "满了" << endl;
return;
}
h->size++;
int i = h->size-1;
while(i != 0 && h->array[(i-1)/2]->freq > node->freq)
{
h->array[i] = h->array[(i-1)/2];
i = (i-1)/2;
}
h->array[i] = node;
}
void buildMinHeap(MinHeap *h)
{
int n = h->size;
for(int i = n/2-1; i >= 0; i--)
heapify(h,i);
}
MinHeap *createAndBuildHeap(char *ch,int *freq,int n)
{
MinHeap *h = createMinHeap(n);
for(int i = 0; i < n; i++)
h->array[i] = newNode(ch[i],freq[i]);
h->size = n;
buildMinHeap(h);
return h;
}
HeapNode *huffman(char *ch,int *freq,int n)
{
HeapNode *left,*right,*top;
MinHeap *h = createAndBuildHeap(ch,freq,n);
while(h->size != 1)
{
left = extractMin(h);
right = extractMin(h);
top = newNode('$',left->freq+right->freq);
top->left = left;
top->right = right;
insertMinHeap(h,top);
}
return extractMin(h);
}
void printCode(HeapNode *node,int *res,int index)
{
if(node->left)
{
res[index] = 0;
printCode(node->left,res,index+1);
}
if(node->right)
{
res[index] = 1;
printCode(node->right,res,index+1);
}
if(!node->left && !node->right)
{
cout << node->ch << ":";
for(int i = 0; i < index; i++)
cout << res[i];
cout << endl;
}
}
int main(void)
{
int n = 5;
char ch[] = {'a','b','c','d','e'};
int freq[] = {3,7,2,8,1};
HeapNode *node = huffman(ch,freq,n);
int res[100],index = 0;
printCode(node,res,index);
return 0;
}