Simple Huffman coding (c language)

  Huffman coding is a widely used and very efficient lossless data compression technique, which is a special type of prefix encoding, and variable length coding. Huffman coding is David A.Huffman developed in the PhD algorithm. As a student at MIT, he published the paper in 1952, entitled "Method of constructing minimum redundancy" of. Although Huffman coding words not often appear in our daily lives, but it L7ZZ DEFLATEE composed of compression algorithms are used zip file, zip file and in many places in life plays a very important role. Whether Mac OS, Unix or Windows systems have native support for the zip file. Packet transmission in the network will use the zip compression algorithm. Today use a wide range of PNG, JPEG, WebP image format, compression algorithm used also includes Huffman encoding method. Huffman coding provides us with a simple and effective way to compress data, widely used in the real world.

  The following is a specific code.

//coder.cpp
#include <stdlib.h>
#include <stdio.h>
#include "Coder.h"

/***********************
构建哈夫曼树及哈夫曼编码
*************************/
void BuildHuffmanTree(  HuffmanTree     &HT,    //哈夫曼树
                        HuffmanCode     &HC,    //哈夫曼编码数组
                        unsigned int    *w,     //字符的权值数组
                        unsigned int    n       //待编码的字符数量
)
{
    if (n <=1 ) return;
    unsigned int m = 2 * n - 1; //生成的哈夫曼树的结点数量

    HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));     //未使用0号存储单位
    HT->weight = m;

    unsigned int i;
    HuffmanTree p;
    ////////////////////////////////
    for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w){
        p->lChild = 0;
        p->rChild = 0;
        p->weight = (unsigned int)*w;
        p->parent = 0;
    }   //初始化哈夫曼树的数组存储结构//
    for (; i <= m; ++i, ++p){
        p->lChild = 0;
        p->rChild = 0;
        p->weight = 0;
        p->parent = 0;
    }//即构造初态                  
    ////////////////////////////////
    for (i = n + 1; i <= m; ++i) {
        unsigned int s1, s2;
        Select(HT, i - 1, s1, s2);
        HT[s1].parent = HT[s2].parent = i;

        HT[i].lChild = s1;
        HT[i].rChild = s2;
        HT[i].weight = HT[s1].weight + HT[s2].weight;
    }

    //----------从叶子到根逆向求每个字符的哈夫曼编码------------
    HC = (HuffmanCode)malloc((n + 1) * sizeof(char *));     //未使用0号存储单元
    char * cd = 0;

    HC[0] = (char *)n;      //利用空闲的0号存储单元,保存HC申请的存储单元数

    cd = (char *)malloc(n * sizeof(char));
    cd[n - 1] = '\0';
    for (i = 1; i <= n; ++i) {
        unsigned int start = n - 1;     //编码结束符位置,减一是因为最后一个存储单元是结束符\0
        for (unsigned int c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent)
            if (HT[f].lChild == c) cd[--start] = '0';
            else cd[--start] = '1';
        HC[i] = (char *)malloc((n - start) * sizeof(char));
        char * d = cd + start;
        strcpy(HC[i], d);
    }
    free(cd);
}

/*********************************************************************
从HT[1...i-1]中选择最小的两个序号,保证最小序号s1的权值小于s2的权值
*********************************************************************/
void Select(HuffmanTree     &HT,    //待选择序号的哈夫曼树
            unsigned int    i,      //最大序号(不包括 i )
            unsigned int    &s1,    //最小序号1
            unsigned int    &s2     //最小序号2
)
{
    unsigned int    *temp[2];
    unsigned int    min = 0;

    temp[0] = &s1;
    temp[1] = &s2;

    //{
        //int l = 0;
    while (HT[++min].parent != 0);
        //min = l;
    //}
    for (int k = 0; k < 2; k++) {

        for (unsigned int j = min + 1; j < i; j++) {
            if (HT[j].parent == 0 && HT[j].weight < HT[min].weight) {
                min = j;
            }
        }
        HT[min].parent = 1; //排除已选出的最小序号        //任意非零正数即可
        *temp[k] = min;

        //int l = 0;
        min = 0;
        while (HT[++min].parent != 0);
        //while (HT[++l].parent != 0);
        //min = l;
    }

    HT[s1].parent = HT[s2].parent = 0;  //恢复原始值
}

//把字符串从 sou 复制到 des
size_t strcpy(  char    *& des,     //目的地字符串
                char    *& sou      //源字符串
)
{
    char *p;
    const char *q;

    p = des;
    q = sou;
    while ((*p++ = *q++));

    return (size_t)(p - des);
}
/*******************************
搜索哈夫曼树,寻找匹配的路径
返回值:匹配的结点序号      
*******************************/
unsigned int SearchTree(    HuffmanTree &HT,    //哈夫曼树
                            char        *&s,    //字符串
                            unsigned int n      //哈夫曼树数组序号
)
{
    char *temp = s;

    if (HT[n].lChild + HT[n].rChild == 0) {     //表示这是一个叶子结点,返回它的序号
        return n;
    }
    else if (!*temp || *temp == '\n') {         //遇到字符串结尾,仍未找到叶子结点,则出错
        fprintf(stderr, "\n输入的编码不完整,请检查输入是否正确。\n");
        return ERROR;
    }

    s += 1;

    switch (*temp)      //递归遍历哈夫曼树
    {
    case '0':
        return SearchTree(HT, s, HT[n].lChild);
        break;
    case '1':
        return SearchTree(HT, s, HT[n].rChild);
        break;
    default:
        fprintf(stderr, "\n发现非法编码值。\n");
        return ERROR;
    }

}
/*
释放构建哈夫曼树时申请的内存
*/
void DestroyTree(HuffmanTree &HT, HuffmanCode &HC)
{
    for (int i = 1; i <= (int)HC[0]; i++)
        free(HC[i]);
    free(HC);
    HC = 0;

    free(HT);
    HT = 0;
}
//coder.h
#pragma once
#include <stdlib.h>

#define ERROR (0)

typedef struct {
    unsigned int    weight;     //权值
    unsigned int    parent;     //父结点序号
    unsigned int    lChild;     //左孩子结点序号
    unsigned int    rChild;     //右孩子结点序号
} HTNode, *HuffmanTree;

typedef char ** HuffmanCode;

void BuildHuffmanTree(HuffmanTree &HT, HuffmanCode &HC, unsigned int * w, unsigned int n);
void Select(HuffmanTree & HT, unsigned int i, unsigned int &s1, unsigned int &s2);
size_t strcpy(char* &des, char* &sou);
unsigned int SearchTree(HuffmanTree &HT, char *&s, unsigned int n);
void DestroyTree(HuffmanTree &HT, HuffmanCode &HC);
// Huffman compress.cpp: 定义控制台应用程序的入口点。
//

#include <stdlib.h>
#include <stdio.h>
#include "Coder.h"


int main()
{
    const int bufferSize = 1000;
    unsigned int maxSize = 100;
    unsigned int    n = 0, n1 = 0,              //待编码的字符数量
        *d = 0;         //字符的权值数组
    int             weight;                     //权值
    char            *data = 0,      //待编码字符数组
        buf[bufferSize];            //控制台读入字符串缓冲区



    d = (unsigned int *)malloc(sizeof(unsigned int) * maxSize);
    data = (char *)malloc(sizeof(char) * maxSize);

    system("title 哈夫曼编码译码器演示程序");
    printf("\n\t\t哈夫曼编码译码器\n本程序仅支持对单个字符的编码,且字符数量限制为100个以内.\n\n"
        "请输入字符及其对应的正整数权值,单独一个回车行结束整个输入。\n例如:\nA 2\nB 3 C 4 d 5\ne 6 \n\n--------------------\n");

    //从控制台窗口接受字符及其对应的权值
    for (;;) {
        char c;
        char s[2];
        int match_len;      //匹配的个数

        putc('>', stdout);
        fgets(buf, 20, stdin);
        if ('\n' == buf[0])
            if (n <= 1) {
                printf("请至少输入两组数据。\n");
                continue;
            }
            else
                break;

        unsigned int in = 0, ind;
        while ((match_len = sscanf(buf + in, "%1s %d%n", &s, &weight, &ind)) == 2 && s[0] > ' ' && weight > 0) {
            c = s[0];
            in += ind;

            int i = 0;
            while (data[i] != c && data[i]) i++;
            if (data[i]) {
                fprintf(stdout, "\n一个字符仅可有一个权值!\n");
                buf[in] = 'Z';
                break;
            }
            d[n] = (unsigned int)weight;

            data[n] = c;
            if (++n >= maxSize) {
                maxSize += 100;
                void * p = 0, *q = 0;
                p = realloc(d, maxSize);
                q = realloc(data, maxSize);
                if (!p && !q) {
                    d = (unsigned int *)p;
                    data = (char *)q;
                }
                else {

                    fprintf(stdout, "\n输入达上限!\n");
                    system("pause");
                    return 0;
                }
            }

        }
        unsigned int ii = in;
        while (buf[ii] <= 32 && buf[ii] > 0)ii++;

        if (ii == 0 || buf[ii] != '\0') { n = n1; printf("%s", "输入错误,请重新输入。\n"); continue; }
        n1 = n;
    }


    HuffmanTree HT;
    HuffmanCode HC;
    //任务一  构造哈夫曼树
    BuildHuffmanTree(HT, HC, d, n);

    //任务二  输出哈夫曼编码
    printf("\n%s\t%s\n", "字符", "哈夫曼编码");
    for (unsigned int i = 1; i <= n; i++) {
        printf("%c\t%s\n", data[i - 1], HC[i]);
    }
    putc('\n', stdout);

    //任务三  翻译哈夫曼编码
    //char  buf2[1001];
    char    *p = buf;
    char    *q = buf;

    unsigned int        result = 1;

    for (;;) {
        printf("\n请输入待译码的字符串:\n>");
        fgets(buf, bufferSize, stdin);

        if ('\n' == buf[0]){
            char c[22];
            printf("确定要退出吗?(y/n)");
            fgets(c, 20, stdin);
            if(c[0] == 'y')
                break;
            c[0] = '\0';
            continue;
        }
        //putc('>', stdout);
        while (p - buf < bufferSize + 1) {
            result = SearchTree(HT, p, 2 * n - 1);
            if (result != ERROR) {
                *q++ = data[result - 1];
                //printf("%c", data[result - 1]);
                /*if (*p == '\n') {
                    putc('\n\n', stdout);
                    break;
                }
                */
                if (*p == '\n') {
                    *q = '\0';
                    printf("原字符串为:\n%s\n\n", buf);
                    break;
                }
            }
            else break;
        }

        p = q = buf;
    }

    DestroyTree(HT, HC);

    system("pause");
    return 0;
}

References:

[1] Yan Min Wei, Wu Weimin. data structure. Beijing: Tsinghua University Press, 2008
[2] Su Shihua. Curriculum design data structure. Beijing: Mechanical Industry Press, 2010
[3] Brian W. Kernighan, Dennis M. Ritchie with, Xubao Wen, Li translated, C programming language. Beijing: Mechanical Industry Press, 2004
[4] https://en.wikipedia.org/wiki/Huffman_coding

Guess you like

Origin www.cnblogs.com/imagineAct/p/11258646.html