哈夫曼编码,huffman的原理及实现

哈夫曼编码是一种无损数据压缩算法。类似我们常用的压缩文件,能够减少存储空间

为了确保解码时不产生歧义,这里我们使用前缀码,即没有任何码字是其他码字的前缀。举个例子:

假设有四个字符a,b,c和d,它们对应的编码为00,01,0和1。如果压缩后的编码是0001,解码的时候就会产生歧义,产生的结果有“cccd”或“ccb”或“acd”或“ab”。前缀码就能避免这类情况的发生。

构建哈夫曼编码的步骤:

这里我们采用优先队列也就是最小堆,请看https://blog.csdn.net/qq_36607792/article/details/81210729

1,首先为每一个字符创建一个节点。并构建所有节点的最小堆。

2,从最小堆中选取字符出现频率最小的两个节点。

3,新建一个节点存储两节点的频率之和,这两个节点作这个节点的左右节点。再将此节点插入最小堆中。

4,重复2,3步骤,直到最小堆中只剩一个节点。

接下来我们用一个例子来理解算法

 字符:a    b   c   d   e

频率: 3    7   2   8   1

1,创建节点,构造最小堆

2,选取频率最小的两个节点e和c

然后将节点3插入到堆中

3,继续选取频率最小的节点

4,

以此类推。

c++代码实现

#include <bits/stdc++.h>

using namespace std;

typedef struct HeapNode
{
    char ch;
    int freq;
    HeapNode *left,*right;
}HeapNode;

typedef struct minHeap
{
    int size;
    int cap;
    HeapNode **array;
}MinHeap;

HeapNode *newNode(char ch,int freq)
{
    HeapNode *node = new HeapNode;
    node->ch = ch;
    node->freq = freq;
    node->left = node->right = NULL;

    return node;
}

MinHeap *createMinHeap(int cap)
{
    MinHeap *h = new MinHeap;
    h->cap = cap;
    h->size = 0;
    h->array = new HeapNode*[cap];
    return h;
}

void swap(HeapNode **a,HeapNode **b)
{
    HeapNode *tmp = *a;
    *a = *b;
    *b = tmp;
}

void heapify(MinHeap *h,int i)
{
    int l = 2*i+1;
    int r = 2*i+2;
    int smallest = i;
    if(l < h->size && h->array[l]->freq < h->array[smallest]->freq)
        smallest = l;
    if(r < h->size && h->array[r]->freq < h->array[smallest]->freq)
        smallest = r;
    if(smallest != i)
    {
        swap(&h->array[smallest],&h->array[i]);
        heapify(h,smallest);
    }
}

HeapNode *extractMin(MinHeap *h)
{
    HeapNode *node = h->array[0];
    h->array[0] = h->array[h->size-1];
    h->size--;
    heapify(h,0);
    return node;
}

void insertMinHeap(MinHeap *h,HeapNode *node)
{
    if(h->size == h->cap)
    {
        cout << "满了" << endl;
        return;
    }
    h->size++;
    int i = h->size-1;
    while(i != 0 && h->array[(i-1)/2]->freq > node->freq)
    {
        h->array[i] = h->array[(i-1)/2];
        i = (i-1)/2;
    }
    h->array[i] = node;
}

void buildMinHeap(MinHeap *h)
{
    int n = h->size;
    for(int i = n/2-1; i >= 0; i--)
        heapify(h,i);
}

MinHeap *createAndBuildHeap(char *ch,int *freq,int n)
{
    MinHeap *h = createMinHeap(n);
    for(int i = 0; i < n; i++)
        h->array[i] = newNode(ch[i],freq[i]);
    h->size = n;
    buildMinHeap(h);
    return h;
}

HeapNode  *huffman(char *ch,int *freq,int n)
{

    HeapNode *left,*right,*top;
    MinHeap *h = createAndBuildHeap(ch,freq,n);
    while(h->size != 1)
    {
        left = extractMin(h);
        right = extractMin(h);
        top = newNode('$',left->freq+right->freq);
        top->left = left;
        top->right = right;
        insertMinHeap(h,top);
    }

    return extractMin(h);
}

void printCode(HeapNode *node,int *res,int index)
{
    if(node->left)
    {
        res[index] = 0;
        printCode(node->left,res,index+1);
    }

    if(node->right)
    {
        res[index] = 1;
        printCode(node->right,res,index+1);
    }

    if(!node->left && !node->right)
    {
        cout << node->ch << ":";
        for(int i = 0; i < index; i++)
            cout << res[i];
        cout << endl;
    }
}

int main(void)
{

    int n = 5;
    char ch[] = {'a','b','c','d','e'};
    int freq[] = {3,7,2,8,1};

    HeapNode *node = huffman(ch,freq,n);

    int res[100],index = 0;
    printCode(node,res,index);

    return 0;
}

猜你喜欢

转载自blog.csdn.net/qq_36607792/article/details/81227950
今日推荐