通过哈夫曼编码压缩文件

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_30986521/article/details/83012451

原理就是统计带压缩文件字符频率,构建哈夫曼树,然后求哈夫曼编码,将字符频率(解压的时候通过字符频率建树)和哈夫曼编码写入文件,完成压缩。

压缩代码:

//获取一个文件的每个字符的频率
void get_frequency(string filename, int frequency[256])
{
    ifstream fin(filename);
    
    if (!fin.is_open())
    {
        return ;
    }
    
    memset(frequency, 0, sizeof(int) * 256);
    
    while (!fin.eof())
    {
        unsigned char temp = fin.get();
        if (fin.eof())
        {
            break;
        }
        frequency[temp]++;
    }

    fin.close();
}
//哈夫曼树的节点
struct node
{
    unsigned char ch;
    int w;
    node *rch, *lch;
};
//获取一个行自定义属性的节点
node* new_node(unsigned char ch, int w, node* lch = NULL, node* rch = NULL)
{
    node* temp = (node*)malloc(sizeof(node));
    temp->ch = ch;
    temp->w = w;
    temp->rch = rch;
    temp->lch = lch;
    return temp;
}
//优先级队列比较大小的方法
struct cmp
{
    bool operator () (node* x, node* y)
    {
        return x->w > y->w;
    }
};
//建树,返回根节点
node* build_haffman(int frequency[256])
{
    priority_queue<node*, vector<node*>, cmp> q;
    for (int i = 0; i < 256; i++)
    {
        if (frequency[i] != 0)
        {
            node* temp = new_node((unsigned char)i, frequency[i]);
            q.push(temp);
        }
    }
    while (q.size() > 1)
    {
        node* x = q.top();
        q.pop();
        node* y = q.top();
        q.pop();
        
        node* temp = new_node(0, x->w + y->w, x, y);
        q.push(temp);
    }
    return q.top();
}
//后跟遍历销毁树
void destory_haffman(node **root)
{
    if (*root)
    {
        destory_haffman(&(*root)->lch);
        destory_haffman(&(*root)->rch);
        free(*root);
    }
}
//获取字符的哈夫曼编码
void get_haffman_code(node* root, vector<char>& v, string code[256])
{
    if (root)
    {
        if (root->lch == NULL && root->rch == NULL)
        {
            string temp = "";
            for (int i = 0; i < v.size(); i++)
            {
                temp += v[i];
            }
            code[root->ch] = temp;
        }
        v.push_back('0');
        get_haffman_code(root->lch, v, code);
        v.pop_back();
        v.push_back('1');
        get_haffman_code(root->rch, v, code);
        v.pop_back();
    }
}
//将8位01码表示为一个unsigned char
unsigned char create_uchar(string haff_code, int index)
{
    unsigned char ch = 0;
    unsigned char flag = 128;
    for (int i = index; i < index + 8; i++)
    {
        ch += flag * (haff_code[i] - '0');
        flag /= 2;
    }
    return ch;
}
//压缩文件的流程
void compress_to_file(string src_file, string dst_file)
{
    ifstream fin(src_file);
    ofstream fout(dst_file, ios::binary);
    
    if (!fin.is_open() || !fout.is_open())
    {
        return;
    }
    
    int frequency[256];
    string code[256];
    vector<char> v;
    get_frequency("/Users/Rubik/Desktop/123.txt", frequency);
    node* root = build_haffman(frequency);
    get_haffman_code(root, v, code);
    
    string haff_code = "";
    unsigned char ch;
    while (!fin.eof())
    {
        ch = fin.get();
        if (fin.eof()) break;
        haff_code += code[ch];
    }
    int len = (int)haff_code.length();
    cout << len << endl;
    fout.write((const char*)frequency, sizeof(int) * 256);
    fout.write((const char*)&len, sizeof(int));
    
    while (haff_code.length() % 8 != 0)
    {
        haff_code += '0';
    }
    
    for (int i = 0; i < haff_code.length(); i += 8)
    {
        unsigned char temp = create_uchar(haff_code, i);
        fout.write((const char*)&temp, sizeof(char));
    }
    
    fout.close();
    fin.close();
    destory_haffman(&root);
}

解压部分比较简单,获取字符频率,建树,获取unsigned char,遍历树,遇到叶子节点就输出到解压文件

//通过一个unsigned char遍历haffman树,存到s[]里,s长度为slen, cnt为已走长度,len为有效长度
node* get_res(node* root, node* pos, unsigned char temp, char* s, int &slen, int &cnt, int len)
{
    slen = 0;
    for (int i = 128; i > 0 && cnt < len; i >>= 1)
    {
        if (i & temp)
        {
            pos = pos->rch;
        }
        else
        {
            pos = pos->lch;
        }
        cnt++;
        if (pos->lch == pos->rch && pos->lch == NULL)
        {
            s[slen++] = pos->ch;
            pos = root;
        }
    }
    return pos;
}

void decompress_to_file(string src_file, string dst_file)
{
    ifstream fin(src_file);
    ofstream fout(dst_file, ios::binary);
    
    int frequency[256];
    fin.read((char*)frequency, sizeof(int) * 256);
    
    node* root = build_haffman(frequency);
    
    vector<char> v;
    string code[256];
    get_haffman_code(root, v, code);
    
    for (int i = 0; i < 256; i++)
    {
        if (code[i].length() > 0)
        {
            cout << code[i] << endl;
        }
    }
    
    int len;
    fin.read((char*)&len, sizeof(int));
    
    unsigned char temp;
    node *pos = root;
    char s[8];
    int slen, cnt = 0;
    while (!fin.eof())
    {
        fin.read((char*)&temp, sizeof(char));
        pos = get_res(root, pos, temp, s, slen, cnt, len);
        for (int i = 0; i < slen; i++)
        {
            fout << s[i];
        }
    }
    
    destory_haffman(&root);
    
    fin.close();
    fout.close();
}
int main()
{
    compress_to_file("/Users/Rubik/Desktop/123.txt", "/Users/Rubik/Desktop/out.txt");
    decompress_to_file("/Users/Rubik/Desktop/out.txt", "/Users/Rubik/Desktop/456.txt");
    return 0;
}

效果如下

猜你喜欢

转载自blog.csdn.net/qq_30986521/article/details/83012451