Huffman编码的实现
哈夫曼编码(Huffman Coding),又称霍夫曼编码,是一种编码方式,哈夫曼编码是可变字长编码(VLC)的一种。Huffman于1952年提出一种编码方法,该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般就叫做Huffman编码(有时也称为霍夫曼编码)。
最小堆的实现
#include<iostream>
#include<vector>
#include<string.h>
#include<utility>
using namespace std;
template<class T>
class Heap {
public:
Heap()
{}
Heap(const T* array, size_t size)
{
v.resize(size);
for (size_t i = 0; i < size; ++i) {
v[i] = array[i];
}
_CreateHeap();
}
//插入元素
void Push(T data)
{
v.push_back(data);
if (v.size() < 2)
return;
_AdjustUp(v.size() - 1);
}
//删除元素
T Pop()
{
if (!v.empty()) {
T temp = v[0];
size_t last = v.size() - 1;
swap(v[last], v[0]);
v.pop_back();
_AdjustDown(0);
return temp;
}
}
//判断堆是否为空
bool Empty()
{
return v.empty();
}
//求堆的大小
size_t Size()
{
return v.size();
}
//取堆顶层元素
T Top()
{
return v[0];
}
private:
//实现最小堆
void _CreateHeap()
{
if (v.size() <= 1)
return;
int root = (v.size() - 1 - 1) >> 1;
for (; root >= 0; root--) {
_AdjustDown(root);
}
}
//向下调整
void _AdjustDown(size_t parent)
{
size_t child = parent * 2 + 1;
size_t size = v.size();
while (child<size) {
if (child + 1 < size&&v[child] > v[child + 1])
child += 1;
if (v[parent] > v[child]) {
swap(v[parent], v[child]);
parent = child;
child = parent * 2 + 1;
}
else {
return;
}
}
}
//向上调整
void _AdjustUp(size_t child) {
size_t parent = (child - 1) >> 1;
while (0 != child) {
if (v[parent] > v[child]) {
swap(v[parent], v[child]);
child = parent;
parent = (child - 1) >> 1;
}
else return;
}
}
private:
vector<T> v;
};
Huffman编码的实现
#include "MinHeap.h"
#include <string>
template <typename E>
class HuffNode {
public:
virtual int getWeight() = 0;//获取当前结点的频率
virtual bool isLeaf() = 0;//判断是否为叶子结点
//找到结点对应的哈夫曼编码,并运用两个向量建立对应的哈夫曼编码表,再用一个向量来储存对应的频率。
virtual void findNode(string code, vector<char> &nameTable, vector<string>&codeTable, vector<int>&fre) = 0;
//找到电文对应的值,把电文翻译为对应的值
virtual void help(HuffNode *root, string str, int &poi) = 0;
};
template <typename E>
class LeafNode : public HuffNode<E> {
public:
E value;//值
int weight;//频率
LeafNode(const E&val, int freq) {
value = val;
weight = freq;
}
//获取当前结点的频率
int getWeight() {
return weight;
}
//找到结点对应的哈夫曼编码,并运用两个向量建立对应的哈夫曼编码表,再用一个向量来储存对应的频率。
void findNode(string code, vector<char> &nameTable, vector<string> &codeTable, vector<int>&fre) {
if (value != NULL) {
//如果是非空的叶子结点,把值,编码,频率存储进向量
nameTable.push_back(value);
codeTable.push_back(code);
fre.push_back(weight);
}
}
//判断是否为叶子结点
bool isLeaf() {
return true;
}
//找到电文对应的值,把电文翻译为对应的值
void help(HuffNode *root, string str, int &poi) {
//如果是叶子结点,把对应的值输出
cout << value << " ";
//如果还有电文未翻译,再从根节点进行遍历
if (poi <str.length()) root->help(root, str, poi);
}
};
template <typename E>
class IntlNode :public HuffNode<E> {
public:
HuffNode<E> *lc;
HuffNode<E> *rc;
int weight;
IntlNode(HuffNode<E>*l, HuffNode<E>*r) {
weight = l->getWeight() + r->getWeight(); lc = l; rc = r;
}
//判断是否为叶子结点
bool isLeaf() {
return false;
}
//设立左子树
void setLeft(HuffNode<E>* b) {
lc = (HuffNode<E>*) b;
}
//设立右子树
void setRight(HuffNode<E>* b) {
rc = (HuffNode<E>*) b;
}
//获取当前结点的频率
int getWeight() {
return weight;
}
//找到结点对应的哈夫曼编码,并运用两个向量建立对应的哈夫曼编码表,再用一个向量来储存对应的频率。
void findNode(string code, vector<char> &nameTable, vector<string> &codeTable, vector<int>&fre) {
string lNewCode = code + '0';//沿左子结点对应编码加1
string rNewCode = code + '1';//沿右子结点对应编码加0
//如果是中间节点,则先进行左子树的迭代,再进行右子树的迭代。
lc->findNode(lNewCode, nameTable, codeTable, fre);
rc->findNode(rNewCode, nameTable, codeTable, fre);
}
//找到电文对应的值,把电文翻译为对应的值
void help(HuffNode *root, string str, int& poi) {
//是中间节点时,当前电文为0则往左递归,1则往右。
if (str[poi++] == '0') { lc->help(root, str, poi); }
else { rc->help(root, str, poi); }
}
};
template <typename E>
class HuffTree {
private:
public:
HuffNode<E>* Root;//根节点
HuffTree() {
Root = NULL;
}
HuffTree(HuffTree<E>* root) {
Root = root;
}
HuffTree(E val, int freq) {
Root = new LeafNode<E>(val, freq);
}
HuffTree(HuffTree<E>* l, HuffTree<E>*r) {
Root = new IntlNode<E>(l->root(), r->root());
}
//返回根节点
HuffNode<E>* root() { return Root; }
//返回频率
int weight() { return Root->getWeight(); }
//运算符重载
bool operator <=(HuffTree<E> &r) {
return weight() <= r.weight();
}
bool operator <(HuffTree<E> &r) {
return weight()<r.weight();
}
bool operator >=(HuffTree<E> &r) {
return weight() >= r.weight();
}
bool operator >(HuffTree<E> &r) {
return weight()>r.weight();
}
};
//构建哈夫曼树
template <typename E>
HuffTree<E>* buildHuff(Heap<HuffTree<E>> &minHeap, string str, vector<char> & str1, vector<string> & str2, vector<int>&fre) {
HuffTree<E> *temp1, *temp2, *temp3 = NULL;//最小堆中的最小的两个树,以及它们合成的新树
while (minHeap.Size() > 1) {
temp1 = &minHeap.Pop();
temp2 = &minHeap.Pop(); //移出最小堆中的最小的两个树
HuffTree<E> *temp3 = new HuffTree<E>(temp1, temp2);//把它们合成为新树
minHeap.Push(*temp3); //把它们合成的新树插入最小堆中
}
//生成哈夫曼编码
createNode(dynamic_cast< IntlNode<char> *>(minHeap.Top().root()), str, str1, str2, fre);
return temp3;
};
template <typename E>
void createNode(HuffNode<E> *node, string code, vector<char> &nameTable, vector<string> &codeTable, vector<int> &fre) {
if (node != NULL) {
node->findNode(code, nameTable, codeTable, fre);
}
};
测试函数
int main() {
HuffTree<char> a('a', 7);
HuffTree<char> b('b', 9);
HuffTree<char> c('c', 6);
HuffTree<char> d('d', 2);
HuffTree<char> e('e', 31);
HuffTree<char> f('f', 3);
HuffTree<char> min[6] = { a,b,c,d,e,f };
Heap<HuffTree<char>> heap(min, 6);
//cout<<heap.Top().weight()<<endl;
//cout << heap.Size();
string str;
vector<char> str1;
vector<string> str2;
vector<int> fre;
HuffTree<char> *final(buildHuff(heap, str, str1, str2, fre));
cout << "测试数据为:a 7 b 9 c 6 d 2 e 31 f 3" << endl;
cout << "哈夫曼编码为:" << endl;
for (int i = 0; i < 6; i++) {
cout << str1[i] << " " << str2[i] << endl;
}
string temp;
cout << "请输入一段电文:";
cin >> temp;
int curr = 0;
heap.Top().root()->help(heap.Top().root(), temp, curr);
cout << "平均长度为";
double sum = 0;
for (int i = 0; i < 6; i++) {
sum += str2[i].length()*fre[i];
}
cout << sum << "/";;
cout << heap.Top().root()->getWeight() << "=";
cout << sum / heap.Top().root()->getWeight();
//cout << heap.root().weight();
//cout << endl;
//cout << heap.deleteTop().weight() << endl;
}
实验结果