一.优先队列实现(最小堆)
也可以直接用STL
template<class T>//最小堆实现优先队列 class pQueue { private: T * heap;//根指针 int Max;//最大容量 int size;//当前结点数 public: pQueue(int max=10);//构造 void heapify(int i);//自底向上调整 void Build(T* A,int n);//建树 int LEFT(int i);//返回左结点编号 int RIGHT(int i);//右 int PARENT(int i);//父 void swap(T& a, T& b);//交换 T MAX();//返回根节点并移除 void Insert(T key);//插入 void Out(int i);//输出树(中序) }; template<class T> int pQueue<T>::LEFT(int i) { return 2 * i; } template<class T> int pQueue<T>::PARENT(int i) { return i / 2; } template<class T> int pQueue<T>::RIGHT(int i) { return 2 * i + 1; } template<class T> void pQueue<T>::swap(T& a, T& b) { T t = a; a = b; b = t; } template<class T> pQueue<T>::pQueue(int max) { size = 0; heap = nullptr; heap = new T[max+1]; memset(heap, 0, sizeof(T)*(max+1)); Max = max; } template<class T> void pQueue<T>::heapify(int i)//调整采用自顶向下,将大的元素往下移,参考自算法导论 { int low = i;//找出最小的结点 int l = LEFT(i); int r = RIGHT(i); if (l <= size && heap[l] < heap[low]) low = l; if (r <= size && heap[r] < heap[low]) low = r; if (low != i) { swap(heap[i], heap[low]);//将最小的结点上移 heapify(low); } } template<class T> void pQueue<T>::Build(T* A,int n)//建立堆 { size = n; if (size > Max)//扩大容量 { T* S = new T[2 * size + 1]; memset(S, 0, sizeof(T) * (2 * size +1)); for (int i = 1; i <= size; i++) S[i] = A[i]; delete heap; heap = S; Max = 2 * size; } for (int i = n / 2; i >= 1; i--)//从最后的非叶节点开始调整 { heapify(i); } } template<class T> T pQueue<T>::MAX()//取出最小的元素(虽然函数吗=名是MAX) { T M = heap[1]; heap[1] = heap[size]; size--; heapify(1);//将最后的结点移动到根结点,再调整。 return M; } template<class T> void pQueue<T>::Insert(T key)//插入结点 { size++; if (size > Max)//扩大容量 { T* S = new T[2 * size + 1]; memset(S, 0, sizeof(T) *(2 * size + 1 )); for (int i = 1; i < size; i++) S[i] = heap[i]; delete heap; heap = S; Max = 2 * size; } heap[size] = key;//插入最后 int i = size; while (i > 1 && heap[PARENT(i)] > heap[i])//自底向上调整 { swap(heap[i], heap[PARENT(i)]); i = PARENT(i);//向上 } } template<class T> void pQueue<T>::Out(int i)//中序输出 { cout << "( "; if (LEFT(i) <= size) Out(LEFT(i)); cout << " <- " << heap[i] << " -> "; if (RIGHT(i) <= size) Out(RIGHT(i)); cout << " )"; }
二.霍夫曼编码:
简单记一下什么是霍夫曼编码。对于一些字符,出现的频率分别为W1, W2, W3, ..., Wn;对这些字符使用二进制编码,编码的长分别为L1, L2ML3, ...Ln。霍夫曼编码可以使 W1*L1 + W2 * L2 + W3 * L3 + ... + Wn * Ln 最小。霍夫曼编码利用二叉树实现,最初有n个结点对应你、个字符,权值分别为对应的频率;之后每次从已有的子树种挑选两个权值最小的组成新的子树,两个子树共用一个新生成的根结点产生新子树,新子树的权值为原两个子树的权值之和,最终从根结点到叶节点路径长即为编码长,可规定右侧取1编码,左侧取0编码或其他方式。(叶节点一定对应字符)#include<vector> #include<fstream> #include<map> using namespace std; struct TNode//霍夫曼树结点 { char data;//字符 double weight;//频率 int l;//左结点编号 int r;//右结点编号 int pre;//父节点编号 int index;//编号 friend bool operator>(TNode a, TNode b);//比较 friend bool operator<(TNode a, TNode b);//比较 }; class Huffman { private: int numLeaf;//树的叶节点树,即字符种类树 int numNode;//树的节点数 TNode* Tree;//霍夫曼树 vector<char> data;//字符种类集 vector<int> W;//频率集 vector<char*>S;//S[i]指第i种字符的编码 map<char, int> M;//字符v是第几种 例: index=M[v],则S[index]为v的编码 public: Huffman(); void Bulid(vector<int> W, vector<char> v, int n);//建树 void HuffmanCode();//编码 int visTreeindex(int i); int visTreeLindex(int i); int visTreeRindex(int i); char visTreechar(int i); char visS(int x, int y);//访问S int visM(char c);//访问M ~Huffman();//析构 };
函数定义:
#include"Huffman_.h" #include"Queue_P.h" #include<iostream> using namespace std; bool operator>(TNode a, TNode b) { return a.weight > b.weight; } bool operator<(TNode a, TNode b) { return a.weight < b.weight; } Huffman::Huffman() { Tree = nullptr; numLeaf = 0; numNode = 0; } void Huffman::Bulid(vector<int> We, vector<char> v, int n) { W = We; data = v; delete[] Tree; numLeaf = n; numNode = 2 * n - 1; Tree = new TNode[numNode + 10]; for (int i = 0; i < numNode; i++) { Tree[i].pre = 0; Tree[i].l = 0; Tree[i].r = 0; Tree[i].weight = 0; Tree[i].data = '\0'; Tree[i].index = i; } pQueue<TNode> Q(100); for (int i = 0; i < numLeaf; i++) { Tree[i].weight = W[i]; Tree[i].data = data[i]; Q.Insert(Tree[i]); M.insert(pair<char, int>(data[i], i));//叶结点压入优先队列 } for (int i = numLeaf; i < numNode; i++) { TNode a = Q.MAX();//取出频率最小的两个结点 TNode b = Q.MAX(); int pa = a.index; int pb = b.index; Tree[pa].pre = i; Tree[pb].pre = i; Tree[i].l = pa; Tree[i].r = pb; Tree[i].weight = Tree[pa].weight + Tree[pb].weight; Q.Insert(Tree[i]); } } void Huffman::HuffmanCode() { //在建好树的基础上编码 int c; int p; int start; char* t = new char[numLeaf]; t[numLeaf - 1] = '\0'; for (int i = 0; i < numLeaf; i++) { start = numLeaf-1; c = i; p = Tree[c].pre; while (p > 0) { start--; if (Tree[p].l == c) { t[start] = '0'; } else t[start] = '1'; c = p; p = Tree[c].pre; } char* s = new char[numLeaf - start]; S.push_back(s); strcpy_s(S[i], (numLeaf - start),t+start); } } Huffman::~Huffman() { for (int i = 0; i < numLeaf; i++) { delete[] S[i]; S[i] = nullptr; } delete[] Tree; } char Huffman::visS(int x, int y) { return S[x][y]; } int Huffman::visM(char c) { return M[c]; } int Huffman::visTreeindex(int i) { return Tree[i].index; } char Huffman::visTreechar(int i) { return Tree[i].data; } int Huffman::visTreeLindex(int i) { return Tree[i].l; } int Huffman::visTreeRindex(int i) { return Tree[i].r; }
三.压缩:
将文本编码为二进制数,可能最后二进制位数不是8的倍数,要补0。向压缩后的文本写入字符对应的编码的编码后的二进制数据。以及二进制数个数和补0个数。
读取文本->计算频率和种类->建立霍夫曼树->向文件写入不同种类字符及其频率->编码为二进制,写入。
#include"Huffman_.h" #include<vector> #include<fstream> #include<iostream> #include<set> using namespace std; class ZIP { private: Huffman H; vector<int> weight;//源文本中各种种类字符对应频率,与CHAR位置对应 vector<char> CHAR;//源文本中各种种类字符,以第一次出现顺序压入 vector<char> DATA;//原文本中所有字符一次压入 char* filename;//文件名 char* write_filename;//压缩文件文件名 int numChar;//字符种类个数 long long int ALLCHAR;//一共有多少个字符 public: void Read_Txt();//读取文本,对DATA操作 void ComInit();//建立霍夫曼树 void Count();//计算频率与种类,对CHAR,weight操作 void Compress();//压缩 读取文本->计算频率和种类->建立霍夫曼树->向文件写入不同种类字符对应的编码->编码为二进制,再以二进制形式打开文件,写入 ZIP(char* s1, char* s2);//构造 ~ZIP();//析构 };
函数定义:
#include"ZIP_.h" ZIP::ZIP(char* s1 ,char* s2) :H() { filename = s1; write_filename = s2; numChar = 0; ALLCHAR = 0; } void ZIP::ComInit() { H.Bulid(weight, CHAR, numChar);//建树 H.HuffmanCode();//对不同字符编码 } void ZIP::Read_Txt()//读取文本 { char ch; fstream os; os.open(filename, ios::in|ios::binary); while (os.peek() != EOF)//文件尾判断 { os.get(ch);//获取一个字符 DATA.push_back(ch);//压入DATA ALLCHAR++;//总字符数+1 } os.close(); } void ZIP::Count() { set<char> M;//查重用 for (int i = 0; i < DATA.size(); i++) { if (M.find(DATA[i]) == M.end())//字符DATA[i]第一次出现 { CHAR.push_back(DATA[i]);//压入字符种类集 weight.push_back(count(DATA.begin(), DATA.end(), DATA[i]));//计算频率 numChar++;//字符种类数+1 M.insert(DATA[i]);//压入映射,便于查重 } } } void ZIP::Compress()//压缩 { Read_Txt();//读取文本 Count();//统计字符 ComInit(); ofstream fout(write_filename, ios::out|ios::binary); fout.write((char*)&numChar, sizeof(int)); for (int i = 0; i < numChar; i++) { fout.write((char*)&CHAR[i], sizeof(unsigned char)); fout.write((char*)&weight[i], sizeof(int)); } char* ANS = new char[100000000];//存储二进制文本 memset(ANS, 0, sizeof(char) * 10000000); int now = 7; long long int pos = 0;//压缩后八位二进制编码的个数 for (long long int i = 0; i < ALLCHAR; i++) { int k = 0; while (H.visS(H.visM(DATA[i]), k) != '\0') { ANS[pos] |= ((int((H.visS(H.visM(DATA[i]), k) - '0'))) << (now));//从高位到地位生成二进制数并写入ANS now--; if (now < 0) { now = 7; pos++; } k++; } } int zero = 0; if (now != 7)//二进制位数不为八的倍数,补0 { zero = now % 7 + 1; pos++; } fout.write((char*)&zero, sizeof(int)); fout.write((char*)&pos, sizeof(long long int)); ANS[pos] = '\0'; fout.write(ANS, pos); delete[] ANS; } ZIP::~ZIP() { }
四.解压:
class DeZIP { private: Huffman H; vector<int> weight;//各种种类字符对应频率,与CHAR位置对应 vector<char> CHAR;//各种种类字符,以第一次出现顺序压入 char* filename;//文件名 char* write_filename;//解压文件文件名 long long int size;//压缩后八位二进制编码的个数 char* str;//读取二进制块 int numChar;//字符种类数 int zero;//补零个数 public: void Read_Txt();//读取文本,建树 void DeCompress();//依据字符和频率重新建立霍夫曼树与二进制解压 DeZIP(char* s1,char* s2);//构造 ~DeZIP();//析构 };
函数定义:
#include"ZIP_.h" void DeZIP::Read_Txt()//读取文本 { ifstream fin(filename, ios::in | ios::binary);//打开文件 fin.read((char*)&numChar, sizeof(int)); char c; int w; for (int i = 0; i < numChar; i++) { fin.read((char*)&c, sizeof(unsigned char)); fin.read((char*)&w, sizeof(int)); CHAR.push_back(c); weight.push_back(w); } fin.read((char*)&zero, sizeof(int)); fin.read((char*)&size, sizeof(long long int)); H.Bulid(weight, CHAR, numChar);//建树 H.HuffmanCode();//对不同字符编码 str = new char[size + 100]; fin.read(str, size); } void DeZIP::DeCompress()//解压 { Read_Txt();//读取被压缩的文本 long long end = 0;//每一个八位二进制读取从哪里结束 int root = 2*numChar-2; ofstream out(write_filename, ios::out || ios::binary); for (int i = 0; i < size; i++) { char v = str[i];//一个二进制数对应的字符 if (i == size - 1)//被补0的二进制,特殊设置end end = zero; for (int j = 7; j >= end; j--)//从高位开始读取二进制数 { if ((1 << j)&v)//1对应右子树 { root = H.visTreeRindex(root); } else//0对应左子树 { root = H.visTreeLindex(root); } if (H.visTreeRindex(root) == 0 && H.visTreeLindex(root) == 0)//1.到达叶节点 2.开始解码下一个字符(可能还在同一个二进制数中) { char c = H.visTreechar(root); out.write(&c, sizeof(unsigned char)); root = 2 * numChar - 2;;//在树上自顶向下解码 } } } out.close(); } DeZIP::DeZIP(char* s1,char* s2) { filename = s1; write_filename = s2; } DeZIP::~DeZIP() { delete[] str; }
测试一下:
while (1) { cout << "压缩文件请按1,解压文件请按2,退出请按3 "; int press; cin >> press; char* s1 = new char[100]; char* s2 = new char[100]; if (press == 1) { cout << "请输入文件路径或程序文件下文件名: "; cin >> s1; cout << "请输入压缩后文件名: "; cin >> s2; ZIP Z( s1,s2); Z.Compress(); delete[] s1; delete[] s2; } else if (press == 2) { cout << "请输入文件路径或程序文件下文件名: "; cin >> s1; cout << "请输入解压后文件名: "; cin >> s2; DeZIP DZ(s1,s2); DZ.DeCompress(); delete[] s1; delete[] s2; } else { delete[] s1; delete[] s2; break; } }
1.31 MB (1,376,256 字节)