哈夫曼编码的简单实现(c语言)

  哈夫曼编码是一种被广泛应用而且非常有效的无损数据压缩技术,它是一种特殊类型的前缀编码,并且是变长编码方式。哈夫曼编码是David A.Huffman在读博士时开发的算法。作为麻省理工学院的学生,他于1952年发表题为“构建最小冗余码的方法”的论文。尽管哈夫曼编码这几个字不常出现在我们的日常生活中,但是它与L7ZZ共同组成的DEFLATEE压缩算法被zip压缩文件所使用,而zip压缩文件在生活中的许多地方起到了非常重要的作用。无论是Mac OS,Unix还是Windows系统都对zip压缩文件有原生的支持。数据包在网络中的传输便使用了zip压缩算法。当今使用广泛的PNG,JPEG,WebP图像格式,所使用的压缩算法也包含了哈夫曼编码方法。哈夫曼编码给我们提供了一个简单有效的压缩数据的方式,在现实中使用广泛。

  以下是具体代码。

//coder.cpp
#include <stdlib.h>
#include <stdio.h>
#include "Coder.h"

/***********************
构建哈夫曼树及哈夫曼编码
*************************/
void BuildHuffmanTree(  HuffmanTree     &HT,    //哈夫曼树
                        HuffmanCode     &HC,    //哈夫曼编码数组
                        unsigned int    *w,     //字符的权值数组
                        unsigned int    n       //待编码的字符数量
)
{
    if (n <=1 ) return;
    unsigned int m = 2 * n - 1; //生成的哈夫曼树的结点数量

    HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));     //未使用0号存储单位
    HT->weight = m;

    unsigned int i;
    HuffmanTree p;
    ////////////////////////////////
    for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w){
        p->lChild = 0;
        p->rChild = 0;
        p->weight = (unsigned int)*w;
        p->parent = 0;
    }   //初始化哈夫曼树的数组存储结构//
    for (; i <= m; ++i, ++p){
        p->lChild = 0;
        p->rChild = 0;
        p->weight = 0;
        p->parent = 0;
    }//即构造初态                  
    ////////////////////////////////
    for (i = n + 1; i <= m; ++i) {
        unsigned int s1, s2;
        Select(HT, i - 1, s1, s2);
        HT[s1].parent = HT[s2].parent = i;

        HT[i].lChild = s1;
        HT[i].rChild = s2;
        HT[i].weight = HT[s1].weight + HT[s2].weight;
    }

    //----------从叶子到根逆向求每个字符的哈夫曼编码------------
    HC = (HuffmanCode)malloc((n + 1) * sizeof(char *));     //未使用0号存储单元
    char * cd = 0;

    HC[0] = (char *)n;      //利用空闲的0号存储单元,保存HC申请的存储单元数

    cd = (char *)malloc(n * sizeof(char));
    cd[n - 1] = '\0';
    for (i = 1; i <= n; ++i) {
        unsigned int start = n - 1;     //编码结束符位置,减一是因为最后一个存储单元是结束符\0
        for (unsigned int c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent)
            if (HT[f].lChild == c) cd[--start] = '0';
            else cd[--start] = '1';
        HC[i] = (char *)malloc((n - start) * sizeof(char));
        char * d = cd + start;
        strcpy(HC[i], d);
    }
    free(cd);
}

/*********************************************************************
从HT[1...i-1]中选择最小的两个序号,保证最小序号s1的权值小于s2的权值
*********************************************************************/
void Select(HuffmanTree     &HT,    //待选择序号的哈夫曼树
            unsigned int    i,      //最大序号(不包括 i )
            unsigned int    &s1,    //最小序号1
            unsigned int    &s2     //最小序号2
)
{
    unsigned int    *temp[2];
    unsigned int    min = 0;

    temp[0] = &s1;
    temp[1] = &s2;

    //{
        //int l = 0;
    while (HT[++min].parent != 0);
        //min = l;
    //}
    for (int k = 0; k < 2; k++) {

        for (unsigned int j = min + 1; j < i; j++) {
            if (HT[j].parent == 0 && HT[j].weight < HT[min].weight) {
                min = j;
            }
        }
        HT[min].parent = 1; //排除已选出的最小序号        //任意非零正数即可
        *temp[k] = min;

        //int l = 0;
        min = 0;
        while (HT[++min].parent != 0);
        //while (HT[++l].parent != 0);
        //min = l;
    }

    HT[s1].parent = HT[s2].parent = 0;  //恢复原始值
}

//把字符串从 sou 复制到 des
size_t strcpy(  char    *& des,     //目的地字符串
                char    *& sou      //源字符串
)
{
    char *p;
    const char *q;

    p = des;
    q = sou;
    while ((*p++ = *q++));

    return (size_t)(p - des);
}
/*******************************
搜索哈夫曼树,寻找匹配的路径
返回值:匹配的结点序号      
*******************************/
unsigned int SearchTree(    HuffmanTree &HT,    //哈夫曼树
                            char        *&s,    //字符串
                            unsigned int n      //哈夫曼树数组序号
)
{
    char *temp = s;

    if (HT[n].lChild + HT[n].rChild == 0) {     //表示这是一个叶子结点,返回它的序号
        return n;
    }
    else if (!*temp || *temp == '\n') {         //遇到字符串结尾,仍未找到叶子结点,则出错
        fprintf(stderr, "\n输入的编码不完整,请检查输入是否正确。\n");
        return ERROR;
    }

    s += 1;

    switch (*temp)      //递归遍历哈夫曼树
    {
    case '0':
        return SearchTree(HT, s, HT[n].lChild);
        break;
    case '1':
        return SearchTree(HT, s, HT[n].rChild);
        break;
    default:
        fprintf(stderr, "\n发现非法编码值。\n");
        return ERROR;
    }

}
/*
释放构建哈夫曼树时申请的内存
*/
void DestroyTree(HuffmanTree &HT, HuffmanCode &HC)
{
    for (int i = 1; i <= (int)HC[0]; i++)
        free(HC[i]);
    free(HC);
    HC = 0;

    free(HT);
    HT = 0;
}
//coder.h
#pragma once
#include <stdlib.h>

#define ERROR (0)

typedef struct {
    unsigned int    weight;     //权值
    unsigned int    parent;     //父结点序号
    unsigned int    lChild;     //左孩子结点序号
    unsigned int    rChild;     //右孩子结点序号
} HTNode, *HuffmanTree;

typedef char ** HuffmanCode;

void BuildHuffmanTree(HuffmanTree &HT, HuffmanCode &HC, unsigned int * w, unsigned int n);
void Select(HuffmanTree & HT, unsigned int i, unsigned int &s1, unsigned int &s2);
size_t strcpy(char* &des, char* &sou);
unsigned int SearchTree(HuffmanTree &HT, char *&s, unsigned int n);
void DestroyTree(HuffmanTree &HT, HuffmanCode &HC);
// Huffman compress.cpp: 定义控制台应用程序的入口点。
//

#include <stdlib.h>
#include <stdio.h>
#include "Coder.h"


int main()
{
    const int bufferSize = 1000;
    unsigned int maxSize = 100;
    unsigned int    n = 0, n1 = 0,              //待编码的字符数量
        *d = 0;         //字符的权值数组
    int             weight;                     //权值
    char            *data = 0,      //待编码字符数组
        buf[bufferSize];            //控制台读入字符串缓冲区



    d = (unsigned int *)malloc(sizeof(unsigned int) * maxSize);
    data = (char *)malloc(sizeof(char) * maxSize);

    system("title 哈夫曼编码译码器演示程序");
    printf("\n\t\t哈夫曼编码译码器\n本程序仅支持对单个字符的编码,且字符数量限制为100个以内.\n\n"
        "请输入字符及其对应的正整数权值,单独一个回车行结束整个输入。\n例如:\nA 2\nB 3 C 4 d 5\ne 6 \n\n--------------------\n");

    //从控制台窗口接受字符及其对应的权值
    for (;;) {
        char c;
        char s[2];
        int match_len;      //匹配的个数

        putc('>', stdout);
        fgets(buf, 20, stdin);
        if ('\n' == buf[0])
            if (n <= 1) {
                printf("请至少输入两组数据。\n");
                continue;
            }
            else
                break;

        unsigned int in = 0, ind;
        while ((match_len = sscanf(buf + in, "%1s %d%n", &s, &weight, &ind)) == 2 && s[0] > ' ' && weight > 0) {
            c = s[0];
            in += ind;

            int i = 0;
            while (data[i] != c && data[i]) i++;
            if (data[i]) {
                fprintf(stdout, "\n一个字符仅可有一个权值!\n");
                buf[in] = 'Z';
                break;
            }
            d[n] = (unsigned int)weight;

            data[n] = c;
            if (++n >= maxSize) {
                maxSize += 100;
                void * p = 0, *q = 0;
                p = realloc(d, maxSize);
                q = realloc(data, maxSize);
                if (!p && !q) {
                    d = (unsigned int *)p;
                    data = (char *)q;
                }
                else {

                    fprintf(stdout, "\n输入达上限!\n");
                    system("pause");
                    return 0;
                }
            }

        }
        unsigned int ii = in;
        while (buf[ii] <= 32 && buf[ii] > 0)ii++;

        if (ii == 0 || buf[ii] != '\0') { n = n1; printf("%s", "输入错误,请重新输入。\n"); continue; }
        n1 = n;
    }


    HuffmanTree HT;
    HuffmanCode HC;
    //任务一  构造哈夫曼树
    BuildHuffmanTree(HT, HC, d, n);

    //任务二  输出哈夫曼编码
    printf("\n%s\t%s\n", "字符", "哈夫曼编码");
    for (unsigned int i = 1; i <= n; i++) {
        printf("%c\t%s\n", data[i - 1], HC[i]);
    }
    putc('\n', stdout);

    //任务三  翻译哈夫曼编码
    //char  buf2[1001];
    char    *p = buf;
    char    *q = buf;

    unsigned int        result = 1;

    for (;;) {
        printf("\n请输入待译码的字符串:\n>");
        fgets(buf, bufferSize, stdin);

        if ('\n' == buf[0]){
            char c[22];
            printf("确定要退出吗?(y/n)");
            fgets(c, 20, stdin);
            if(c[0] == 'y')
                break;
            c[0] = '\0';
            continue;
        }
        //putc('>', stdout);
        while (p - buf < bufferSize + 1) {
            result = SearchTree(HT, p, 2 * n - 1);
            if (result != ERROR) {
                *q++ = data[result - 1];
                //printf("%c", data[result - 1]);
                /*if (*p == '\n') {
                    putc('\n\n', stdout);
                    break;
                }
                */
                if (*p == '\n') {
                    *q = '\0';
                    printf("原字符串为:\n%s\n\n", buf);
                    break;
                }
            }
            else break;
        }

        p = q = buf;
    }

    DestroyTree(HT, HC);

    system("pause");
    return 0;
}

参考资料:

[1] 严蔚敏,吴伟民.数据结构.北京:清华大学出版社,2008
[2] 苏仕华.数据结构课程设计.北京:机械工业出版社,2010
[3] Brian W. Kernighan,Dennis M. Ritchie著,徐宝文,李志 译,C程序设计语言.北京:机械工业出版社,2004
[4] https://en.wikipedia.org/wiki/Huffman_coding

猜你喜欢

转载自www.cnblogs.com/imagineAct/p/11258646.html