基本思想:
压缩:
1、统计出文件中相同字符出现的次数
2、获取哈夫曼编码
次数作为权值构建哈夫曼树
3、重新编码,写回压缩文件
保存头文件:
源文件后缀
编码信息的行数
每个字符的权
保存编码
解压缩:
1、获取原文件后缀
2、获取每个字符出现的次数,即权值
3、利用之前后的的权值,还原哈夫曼树
4、找到对应的叶子节点,将信息保存到解压文件中
在写压缩文件之前,首先需要实现堆和哈夫曼树
1,建堆
#include<iostream>
#include<vector>
using namespace std;
//利用仿函数的特性实现代码的复用性
template<class T>
struct Small
{
bool operator()(const T& l, const T& r)
{
return l < r;
}
};
template<class T>
struct Large
{
bool operator()(const T& l, const T& r)
{
return l > r;
}
};
template<class T, class Compare = Large<T>> //缺省是建小堆
class Heap
{
public:
Heap()
{}
Heap(const T *a, int size)
{
assert(a);
_a.reserve(size);
for (int i = 0; i<size; ++i)
{
_a.push_back(a[i]);
}
//建堆的时候从倒数第一个非叶子结点开始.
for (int j = (size - 2) / 2; j >= 0; --j)
{
adjust_down(j);
}
}
void Push(const T& x)
{
_a.push_back(x);
adjust_up(_a.size() - 1);
}
void Pop()
{
assert(!_a.empty());
swap(_a[0], _a[_a.size() - 1]);
_a.pop_back();
adjust_down(0);
}
size_t Size()
{
return _a.size();
}
bool Empty()
{
return _a.empty();
}
const T& Top()const
{
assert(!_a.empty());
return _a[0];
}
void Display()
{
for (size_t i = 0; i<_a.size(); ++i)
{
cout << _a[i] << " ";
}
cout << endl;
}
void adjust_down(int root)
{
Compare com;
int parent = root;
int child = parent * 2 + 1;//parent的左孩子
while (child < _a.size())
{
/*if rightchild > leftchild,child->right
while 里面我们已经可以确定child(左孩子下标一定小于size
但是我们不能保证右孩子的下标小于size,所以if语句里我们
要判断一下,以免访问越界)
*/
if (child + 1<_a.size() && com(_a[child + 1], _a[child]))
//if (child + 1<_a.size() && _a[child + 1] > _a[child])
{
++child;
}
if (com(_a[child], _a[parent]))//如果是>则为大堆
//if (_a[child] > _a[parent])//if child>parent,swap
{
swap(_a[child], _a[parent]);
parent = child; //让parent指向child,继续向下调整
child = child * 2 + 1;
}
else
break;
}
}
void adjust_up(int child)
{
Compare com;
size_t parent = (child - 1) >> 1;
while (child > 0)
{
if (com(_a[child], _a[parent]))
// if (_a[child] > _a[parent])
{
swap(_a[child], _a[parent]);
child = parent;
parent = (child - 1) >> 1;
}
break;
}
}
protected:
vector<T> _a;
};
2,构建哈弗曼树
#include "heap.h"
template<class T>
struct HuffmanTreeNode
{
T _weight;
HuffmanTreeNode<T> *_left;
HuffmanTreeNode<T> *_right;
HuffmanTreeNode<T> *_parent;
HuffmanTreeNode(const T& w = T())
:_weight(w) //权值
, _left(NULL)
, _right(NULL)
, _parent(NULL)
{}
};
template<class T>
class HuffmanTree
{
typedef HuffmanTreeNode<T> Node;
public:
HuffmanTree()
:_root(NULL)
{}
HuffmanTree(const T* a, size_t size)
:_root(NULL)
{
//定义一个内部类
struct NodeLess
{
bool operator()(Node *l, Node *r)const
{
return l->_weight < r->_weight;
}
};
Heap<Node *, NodeLess> minHeap;
//建立结点并放入vector中
for (size_t i = 0; i<size; ++i)
{
Node *tmp = new Node(a[i]);
minHeap.Push(tmp);
}
//取出较小的两个结点作为左右孩子并构建父结点
while (minHeap.Size() > 1)
{
Node *left = minHeap.Top();
minHeap.Pop();
Node *right = minHeap.Top();
minHeap.Pop();
Node *parent = new Node(left->_weight + right->_weight);
parent->_left = left;
parent->_right = right;
left-> = p_parentarent;
right->_parent = parent;
minHeap.Push(parent);
}
_root = minHeap.Top();
}
HuffmanTree(const T* a, size_t size, const T& invalid)
{
struct NodeLess
{
bool operator()(Node *l, Node *r)const
{
return l->_weight < r->_weight;
}
};
Heap<Node *, NodeLess> minHeap;
//建立结点并放入vector中
for (size_t i = 0; i<size; ++i)
{
if (a[i] != invalid)
{
Node *tmp = new Node(a[i]);
minHeap.Push(tmp);
}
}
//取出较小的两个结点作为左右孩子并构建父结点
while (minHeap.Size() > 1)
{
Node *left = minHeap.Top();
minHeap.Pop();
Node *right = minHeap.Top();
minHeap.Pop();
Node *parent = new Node(left->_weight + right->_weight);
parent->_left = left;
parent->_right = right;
left->_parent = parent;
right->_parent = parent;
minHeap.Push(parent);
}
_root = minHeap.Top();
}
Node* GetRoot()
{
return _root;
}
void Destroy(Node* &root)
{
if (root == NULL)
return;
Destroy(root->_left);
Destroy(root->_rihgt);
delete root;
root = NULL:
return;
}
protected:
Node *_root;
};
3,生产哈夫曼编码,并进行压缩和解压缩
#include<string>
#include<Windows.h>
#include<assert.h>
#include "huffman_tree.h"
using namespace std;
typedef long long Type;
struct CharInfo
{
unsigned char _ch; //出现的字符
Type _count; //统计次数
string _code; //Huffman编码
CharInfo(Type count = 0)
:_ch(0)
, _count(count)
, _code("")
{}
//重载对应的操作符
CharInfo operator + (const CharInfo& fc)const
{
return CharInfo(_count + fc._count);
}
bool operator != (const CharInfo fc)const
{
return _count != fc._count;
}
bool operator < (const CharInfo& fc)const
{
return _count < fc._count;
}
};
class FileCompress
{
protected:
CharInfo _infos[256];
public:
//默认的构造函数
FileCompress()
{
for (size_t i = 0; i<256; ++i)
{
_infos[i]._ch = i;
}
}
//生成Huffman_code函数
void GenerateHufffmanCode(HuffmanTreeNode<CharInfo> * root, string code)
{
if (root == NULL)return;
if (root->_left == NULL&&root->_right == NULL)//叶子节点
{
_infos[root->_weight._ch]._code = code;
return;
}
GenerateHufffmanCode(root->_left, code + '0');
GenerateHufffmanCode(root->_right, code + '1');
}
string Compress(const char *filename)
{
assert(filename);
FILE *pf = fopen(filename, "rb");
assert(pf);
//fgetc函数的作用是意为从文件指针stream指向的文件中读取一个字符,读取一个字节后,光标位置后移一个字节,返回值为他所读到的字符,因为返回值要能表示-1,所以返回值类型是int
unsigned char ch = fgetc(pf);
//统计字符出现的次数
while (!feof(pf))//feof检测文件流上的结束标志
{
_infos[ch]._count++;
ch = fgetc(pf);
}
//以该字符出现的次数构建一颗HuffmanTree.
CharInfo invalid; //非法值
HuffmanTree<CharInfo> ht(_infos, 256, invalid);
//生成Huffman编码
string code;
GenerateHufffmanCode(ht.GetRoot(), code);
//压缩文件
fseek(pf, 0, SEEK_SET); //回到文件头
string compressfile = filename;
compressfile += ".compress"; //压缩后的文件名
FILE *fin = fopen(compressfile.c_str(), "wb");
assert(fin);
size_t pos = 0; //记录位数
unsigned char value = 0;
ch = fgetc(pf);
while (!feof(pf))
{
string &code = _infos[ch]._code;
for (size_t i = 0; i<code.size(); ++i)
{
value <<= 1;
if (code[i] == '1')
value |= 1;
else
value |= 0; //do-nothing
++pos;
if (pos == 8) //满一个字节
{
fputc(value, fin);
value = 0;
pos = 0;
}
}
ch = fgetc(pf);
}
if (pos) //解决不足8位的情况.
{
value <<= (8 - pos);
fputc(value, fin);
}
//配置文件--便于重建Huffman树
string configfilename = filename;
configfilename += ".config";
FILE *finconfig = fopen(configfilename.c_str(), "wb");
assert(finconfig);
string line;
char buff[128];
for (size_t i = 0; i<256; ++i)
{
//一行一行的读
if (_infos[i]._count)
{
line += _infos[i]._ch;
line += ",";
line += _itoa(_infos[i]._count, buff, 10);
line += "\n";
//fputs(line.c_str(),finconfig);
fwrite(line.c_str(), 1, line.size(), finconfig);
line.clear();
}
}
fclose(pf);
fclose(fin);
fclose(finconfig);
return compressfile;
}
string UnCompress(const char *filename)
{
assert(filename);
string configfilename = filename;
size_t index = configfilename.rfind(".");
configfilename = configfilename.substr(0, index);
configfilename += ".config";
FILE *foutconfig = fopen(configfilename.c_str(), "rb");
assert(foutconfig);
string line;
//读取配置文件--获取字符出现的次数
unsigned char ch = 0;
while (ReadLine(foutconfig, line))
{
if (line.empty())
{
line += '\n';
continue;
}
//读到空行
ch = line[0];
_infos[ch]._count = atoi(line.substr(2).c_str());
line.clear();
}
//构建Huffman树
CharInfo invalid;
HuffmanTree<CharInfo> hft(_infos, 256, invalid);
//根结点的权值也就是字符出现的次数总和
HuffmanTreeNode<CharInfo> *root = hft.GetRoot();
Type charcount = root->_weight._count;
//解压缩
string uncompressfilename = filename;
index = uncompressfilename.rfind(".");
uncompressfilename = uncompressfilename.substr(0, index);
uncompressfilename += ".uncompress";
FILE *fin = fopen(uncompressfilename.c_str(), "wb");
assert(fin);
//由压缩文件还原文件
string compressfilename = filename;
FILE *fout = fopen(compressfilename.c_str(), "rb");
assert(fout);
HuffmanTreeNode<CharInfo> *cur = root;
int pos = 7;
ch = fgetc(fout);
while (charcount > 0)
{
while (cur)
{
if (cur->_left == NULL && cur->_right == NULL)
{
//叶子结点
fputc(cur->_weight._ch, fin);
cur = root;
--charcount;
if (charcount == 0) //所有的字符都处理完成
break;
}
if (ch & (1 << pos)) //检查字符的每个位
cur = cur->_right; //1向右走
else
cur = cur->_left; //0向左走
--pos;
if (pos < 0) //一个字节解压完成
{
ch = fgetc(fout);
pos = 7;
}
}
}
fclose(foutconfig);
fclose(fin);
fclose(fout);
return uncompressfilename;
}
//读取一行字符并放在line中
bool ReadLine(FILE *fout, string& line)
{
int ch = fgetc(fout);
if (ch == EOF)
return false;
while (ch != EOF && ch != '\n')
{
line += ch;
ch = fgetc(fout);
}
return true;
}
};
4,测试
#include"huffman_code.h"
void testFileCompress()
{
FileCompress fc;
fc.Compress("1.png");
fc.UnCompress("1.png.compress");
}
int main()
{
//testFileCompress1();
testFileCompress();
system("pause");
return 0;
}