C++项目 - 文件压缩

文件压缩


开发环境:Windows Visual Studio 2013

项目概述:利用Huffman编码可对任意文件(包含图片、视频、音频)进行压缩和解压缩。

使用技术:运用到的数据结构:Heap堆、Huffmantree哈夫曼树、Huffmancode哈夫曼编码

项目思想:压缩文件时利用小堆建立哈夫曼树,依据建立的哈夫曼树产生哈夫曼编码。利用哈夫曼编码对文件进行压缩,产生压缩文件和配置文件。


那么何为哈夫曼数?

  • Huffman树,又称为最优二叉树,是加权路径长度最短的二叉树。

【贪心算法】

  • 是指在问题求解时,总是做出当前看起来最好的选择。也就是说贪心算法做出的不是整体最优的的选择,而是某种意义上的 局部最优解。贪心算法不是对所有的问题都能得到整体最优解。

使用贪心算法构建Huffman树:
这里写图片描述


利用哈夫曼树生成哈夫曼编码,
这里写图片描述

  • 源代码如下:
    huffmantree.h
#pragma once  

#include "heap.h"  
#include<assert.h>  


template<class T>
struct HuffmanTreeNode
{
    HuffmanTreeNode<T>* _left;
    HuffmanTreeNode<T>* _right;
    HuffmanTreeNode<T>* _parent;
    T _weight;

    HuffmanTreeNode(const T& x)
        :_weight(x)
        , _left(NULL)
        , _right(NULL)
        , _parent(NULL)
    {}
};

template<class T>
class HuffmanTree
{
    typedef HuffmanTreeNode<T> Node;

public:

    HuffmanTree()
        :_root(NULL)
    {}

    ~HuffmanTree()
    {
        Destory(_root);
    }

    template <class T>
    struct NodeCompare
    {
        bool operator()(Node *l, Node *r)
        {
            return l->_weight < r->_weight;
        }
    };

public:
    void CreatTree(const T* a, size_t size, const T& invalid)
    {
        assert(a);
        Heap<Node*, NodeCompare<T>> minHeap;
        for (size_t i = 0; i < size; ++i)
        {
            if (a[i] != invalid)
            {
                Node* node = new Node(a[i]);
                minHeap.Push(node);
            }
        }

        while (minHeap.Size() > 1)
        {
            Node* left = minHeap.Top();
            minHeap.Pop();
            Node* right = minHeap.Top();
            minHeap.Pop();

            Node* parent = new Node(left->_weight + right->_weight);
            parent->_left = left;
            parent->_right = right;
            left->_parent = parent;
            right->_parent = parent;

            minHeap.Push(parent);
        }

        _root = minHeap.Top();
    }

    Node* GetRootNode()
    {
        return _root;
    }

    void Destory(Node* root)
    {
        if (root)
        {
            Destory(root->_left);
            Destory(root->_right);
            delete root;
            root = NULL;
        }
    }

private:
    HuffmanTreeNode<T>* _root;
};

void TestHuffmanTree()
{
    int a[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
    HuffmanTree<int> hf;
    hf.CreatTree(a, 10, -1);
}

heap.h

#pragma once  
#include <iostream>
#include <vector>  
#include<assert.h> 
using namespace std;

// 小堆  
template<class T>
struct Less
{
    bool operator() (const T& l, const T& r)
    {
        return l < r; // operator<  
    }
};

template<class T>
struct Greater
{
    bool operator() (const T& l, const T& r)
    {
        return l > r; // operator<  
    }
};

template<class T, class Compare = Less<T>>
class Heap
{
public:
    Heap()
    {}

    Heap(const T* a, size_t size)
    {
        for (size_t i = 0; i < size; ++i)
        {
            _arrays.push_back(a[i]);
        }

        // 建堆  
        for (int i = (_arrays.size() - 2) / 2; i >= 0; --i)
        {
            AdjustDown(i);
        }
    }

    void Push(const T& x)
    {
        _arrays.push_back(x);
        AdjustUp(_arrays.size() - 1);
    }

    void Pop()
    {
        assert(_arrays.size() > 0);
        swap(_arrays[0], _arrays[_arrays.size() - 1]);
        _arrays.pop_back();

        AdjustDown(0);
    }

    T& Top()
    {
        assert(_arrays.size() > 0);
        return _arrays[0];
    }

    bool Empty()
    {
        return _arrays.empty();
    }

    int Size()
    {
        return _arrays.size();
    }

    void AdjustDown(int root)
    {
        int child = root * 2 + 1;
        //    
        Compare com;
        while (child < _arrays.size())
        {
            // 比较出左右孩子中小的那个  
            //if (child+1<_arrays.size() &&  
            //  _arrays[child+1] > _arrays[child])  
            if (child + 1<_arrays.size() &&
                com(_arrays[child + 1], _arrays[child]))
            {
                ++child;
            }

            //if(_arrays[child] > _arrays[root])  
            if (com(_arrays[child], _arrays[root]))
            {
                swap(_arrays[child], _arrays[root]);
                root = child;
                child = 2 * root + 1;
            }
            else
            {
                break;
            }
        }
    }

    void AdjustUp(int child)
    {
        int parent = (child - 1) / 2;

        //while (parent >= 0)  
        while (child > 0)
        {
            //if (_arrays[child] > _arrays[parent])  
            if (Compare()(_arrays[child], _arrays[parent]))
            {
                swap(_arrays[parent], _arrays[child]);
                child = parent;
                parent = (child - 1) / 2;
            }
            else
            {
                break;
            }
        }
    }

    void Print()
    {
        for (size_t i = 0; i < _arrays.size(); ++i)
        {
            cout << _arrays[i] << " ";
        }
        cout << endl;
    }

public:
    /*T* _array;
    size_t _size;
    size_t _capacity;*/
    vector<T> _arrays;
};

template<class T>
class PriorityQueue
{
public:
    void Push(const T& x)
    {
        _hp.Push(x);
    }

    void Pop()
    {
        _hp.Pop();
    }

public:
    Heap<T> _hp;
};

void Test1()
{
    int a[10] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 };
    Heap<int, Greater<int> > hp1(a, 10);
    hp1.Push(1);
    hp1.Print();

    Heap<int> hp2(a, 10);
    hp2.Push(1);
    hp2.Print();


    //Less<int> less;  
    //cout<<less(1, 2)<<endl;  

    //Greater<int> greater;  
    //cout<<greater(1, 2)<<endl;  
}

#include <list>  

void Test2()
{
    vector<int> v1;
    v1.push_back(1);
    v1.push_back(2);
    v1.push_back(3);
    v1.push_back(4);

    // [)  
    vector<int>::iterator it = v1.begin();
    while (it != v1.end())
    {
        cout << *it << " ";
        ++it;
    }
    cout << endl;


    list<int> l1;
    l1.push_back(1);
    l1.push_back(2);
    l1.push_back(3);
    l1.push_back(4);

    list<int>::iterator listIt = l1.begin();
    while (listIt != l1.end())
    {
        cout << *listIt << " ";
        ++listIt;
    }

    cout << endl;
}

void AdjustDown(int* a, size_t size, int root)
{
    int child = root * 2 + 1;
    while (child < size)
    {
        if (child + 1 < size && a[child + 1] > a[child])
        {
            ++child;
        }

        if (a[child] > a[root])
        {
            swap(a[child], a[root]);
            root = child;
            child = 2 * root + 1;
        }
        else
        {
            break;
        }
    }
}


void HeapSort(int* a, size_t size)
{
    // 建堆  
    for (int i = (size - 2) / 2; i >= 0; --i)
    {
        AdjustDown(a, size, i);
    }

    // 选数据排序  
    for (size_t i = 0; i < size; ++i)
    {
        swap(a[0], a[size - i - 1]);
        AdjustDown(a, size - i - 1, 0);
    }
}

void TestHeapSort()
{
    int a[10] = { 5, 9, 2, 3, 0, 1, 7, 8, 4, 6 };
    HeapSort(a, 10);
}

file_compress.h

#pragma once  

#include "huffmantree.h"  
#include<algorithm>  
#include<windows.h>  

typedef long long LongType;

struct FileInfo
{
    unsigned char _ch;
    LongType   _count;
    string      _code;

    FileInfo(unsigned char ch = 0)
        :_ch(ch)
        , _count(0)
    {}

    FileInfo operator+(FileInfo& fi)
    {
        FileInfo tmp;
        tmp._count = this->_count + fi._count;
        return tmp;
    }

    bool operator < (FileInfo& fi)
    {
        return this->_count < fi._count;
    }

    bool operator != (const FileInfo& fi)const
    {
        return this->_count != fi._count;
    }

};

template<class T>
class FileCompress
{
public:
    FileCompress()
    {
        for (int i = 0; i < 256; ++i)
        {
            _infos[i]._ch = i;
        }
    }

public:

    bool Compress(const char* filename)
    {
        //1.打开文件,统计文件字符出现的次数  
        long long Charcount = 0;
        assert(filename);
        FILE* fOut = fopen(filename, "rb");
        assert(fOut);

        char ch = fgetc(fOut);

        while (!feof(fOut))
        {
            _infos[(unsigned char)ch]._count++;
            ch = fgetc(fOut);
            Charcount++;
        }

        //2.生成对应的huffman编码  
        GenerateHuffmanCode();

        //3.压缩文件  
        string compressFile = filename;
        compressFile += ".compress";
        FILE* fwCompress = fopen(compressFile.c_str(), "wb");
        assert(fwCompress);

        fseek(fOut, 0, SEEK_SET);
        ch = fgetc(fOut);
        char inch = 0;
        int index = 0;
        while (!feof(fOut))
        {
            string& code = _infos[(unsigned char)ch]._code;
            for (size_t i = 0; i < code.size(); ++i)
            {
                inch = inch << 1;
                if (code[i] == '1')
                {
                    inch |= 1;
                }
                if (++index == 8)
                {
                    fputc(inch, fwCompress);
                    inch = 0;
                    index = 0;
                }
            }
            ch = fgetc(fOut);
        }

        if (index)
        {
            inch = inch << (8 - index);
            fputc(inch, fwCompress);
        }

        //4.配置文件,方便后续的解压缩  
        string configFile = filename;
        configFile += ".config";
        FILE *fconfig = fopen(configFile.c_str(), "wb");
        assert(fconfig);

        char CountStr[128];
        _itoa(Charcount >> 32, CountStr, 10);
        fputs(CountStr, fconfig);
        fputc('\n', fconfig);
        _itoa(Charcount & 0xffffffff, CountStr, 10);
        fputs(CountStr, fconfig);
        fputc('\n', fconfig);

        FileInfo invalid;
        for (int i = 0; i < 256; i++)
        {
            if (_infos[i] != invalid)
            {
                fputc(_infos[i]._ch, fconfig);
                fputc(',', fconfig);
                fputc(_infos[i]._count + '0', fconfig);
                fputc('\n', fconfig);
            }
        }

        fclose(fOut);
        fclose(fwCompress);
        fclose(fconfig);

        return true;
    }

    bool UnCompresss(const char* filename)
    {
        string configfile = filename;
        configfile += ".config";
        FILE* outConfig = fopen(configfile.c_str(), "rb");
        assert(outConfig);
        char ch;
        long long Charcount = 0;
        string line = ReadLine(outConfig);
        Charcount = atoi(line.c_str());
        Charcount <<= 32;
        line.clear();
        line = ReadLine(outConfig);
        Charcount += atoi(line.c_str());
        line.clear();

        while (feof(outConfig))
        {
            line = ReadLine(outConfig);
            if (!line.empty())
            {
                ch = line[0];
                _infos[(unsigned char)ch]._count = atoi(line.substr(2).c_str());
                line.clear();
            }
            else
            {
                line = '\n';
            }
        }

        HuffmanTree<FileInfo> ht;
        FileInfo invalid;
        ht.CreatTree(_infos, 256, invalid);

        HuffmanTreeNode<FileInfo>* root = ht.GetRootNode();

        string  UnCompressFile = filename;
        UnCompressFile += ".uncompress";
        FILE* fOut = fopen(UnCompressFile.c_str(), "wb");

        string CompressFile = filename;
        CompressFile += ".compress";
        FILE* fIn = fopen(CompressFile.c_str(), "rb");

        int pos = 8;
        HuffmanTreeNode<FileInfo>* cur = root;
        ch = fgetc(fIn);

        while ((unsigned char)ch != EOF)
        {
            --pos;
            if ((unsigned char)ch &(1 << pos))
            {
                cur = cur->_right;
            }
            else
            {
                cur = cur->_left;
            }
            if (cur->_left == NULL && cur->_right == NULL)
            {
                fputc(cur->_weight._ch, fOut);
                cur = root;
                Charcount--;
            }
            if (pos == 0)
            {
                ch = fgetc(fIn);
                pos = 8;
            }
            if (Charcount == 0)
            {
                break;
            }
        }

        fclose(outConfig);
        fclose(fIn);
        fclose(fOut);
        return true;
    }

protected:

    string ReadLine(FILE* fConfig)
    {
        char ch = fgetc(fConfig);
        if (ch == EOF)
        {
            return "";
        }
        string line;
        while (ch != '\n' && ch != EOF)
        {
            line += ch;
            ch = fgetc(fConfig);
        }
        return line;
    }

    void GenerateHuffmanCode()
    {
        HuffmanTree<FileInfo> hft;
        FileInfo invalid;
        hft.CreatTree(_infos, 256, invalid);
        _GenerateHuffmanCode(hft.GetRootNode());
    }

    void _GenerateHuffmanCode(HuffmanTreeNode<FileInfo>* root)
    {
        if (root == NULL)
        {
            return;
        }

        _GenerateHuffmanCode(root->_left);
        _GenerateHuffmanCode(root->_right);

        if (root->_left == NULL && root->_right == NULL)
        {
            HuffmanTreeNode<FileInfo>* cur = root;
            HuffmanTreeNode<FileInfo>* parent = cur->_parent;
            string& code = _infos[cur->_weight._ch]._code;

            while (parent)
            {
                if (parent->_left == cur)
                {
                    code += '0';
                }
                else if (parent->_right == cur)
                {
                    code += '1';
                }
                cur = parent;
                parent = cur->_parent;
            }

            reverse(code.begin(), code.end());
        }
    }

private:
    FileInfo _infos[256];
};

void TestFileCompress()
{

    FileCompress<FileInfo> fc;

    int begin1 = GetTickCount();
    fc.Compress("QQ20171029-144442-HD.mp4");
    int end1 = GetTickCount();
    cout << end1 - begin1 << endl;

    int begin2 = GetTickCount();
    fc.UnCompresss("QQ20171029-144442-HD.mp4");//此地址为需要压缩文件所放的地址
    int end2 = GetTickCount();
    cout << end2 - begin2 << endl;

}

test.cpp

#define _CRT_SECURE_NO_WARNINGS  

#include <iostream>  
using namespace std;

//#include "HuffmanTree.h"  
#include "file_compress.h"  

int main()
{
    //  TestHuffmanTree();  
    TestFileCompress();
    return 0;
}

猜你喜欢

转载自blog.csdn.net/ZAhqc_IT/article/details/80067536