一、哈夫曼树
1.书上用的是静态链表实现,本文中的哈夫曼树用 排序链表 实现;
2.实现了从 字符频率统计、构建权值集合、创建哈夫曼树、生成哈夫曼编码,最后对 给定字符串的编码、解码功能。
3.使用到的 “SortedList.h”头文件,在上篇博文:数据结构之排序单链表。
二、构建过程
三、代码
//文件名:"HfmTree.h"
#pragma once
#include "SortedList.h" //"C1_Test.h" 排序列表
#include <string>
using namespace std;
/*
. 二叉树应用:哈夫曼树及哈夫曼编码实现
. 存储结构:三叉链表
*/
//哈夫曼树结点
struct HTNode
{
char c; //字符域
int weight; //权重
HTNode * parent; //双亲结点
HTNode * lchild; //左指针域
HTNode * rchild; //右指针域
friend ostream & operator <<(ostream& out, HTNode *p)
{
/*
. 友元函数重载输出操作符,实现对象输出
*/
out << "(" << p->c << ":" << p->weight << ")";
return out;
}
};
class HfmTree
{
private:
/*
. 词频数组
. 目前支持:英文字符(含大小写),共52个
*/
static const int _ARR_SIZE = 52; //词频数组大小
static const char _START_C = 'a'; //词频数组 0 下标对应的 字符 'a'
static const int _MAGNIFICATION = 100; //词频放大倍数
int charFreqArr[_ARR_SIZE]{0}; //词频数组(含大小写),初始化为 0
string charCodeArr[_ARR_SIZE]{ "" }; //字符编码数组
void _Arr_StatisticCharFreq(string &s); //统计字符频率
/*
, 权值集合排序单链表
*/
SortedList<HTNode> * varySet; //变化的权值集合 链表(用于构建哈夫曼树根结点的生成)
SortedList<HTNode> * originSet; //初始的权重集合 链表(用于存放 叶节点指针)
/*
. 哈夫曼树
*/
HTNode * root; //哈夫曼树根结点
int leafNum; //叶结点数
void _CreateWeightSet(); //创建权值集合(排序单链表)
void _CreateHfmTree(); //构建哈夫曼树
void _GenerateHfmCode(); //生成哈夫曼编码
public:
HfmTree(); //无参构造
void Init(string &s); //初始化字符串
void HfmCodeDisplay(); //显示哈夫曼编码
string Encoding(string s); //编码
string Decoding(string s); //解码
};
//文件名:"HfmTree.cpp"
#include "stdafx.h"
#include <iostream>
#include <string>
#include "HfmTree.h"
using namespace std;
int _HTNode_Compare(HTNode * e1, HTNode *e2)
{
/*
. 实现 SortedList 类的 compare 接口
*/
if (e1->weight > e2->weight)
return 1;
else if (e1->weight == e2->weight)
return 0;
else
return -1;
}
HfmTree::HfmTree()
{
/*
. 无参构造
*/
//初始化变化集合链表
this->varySet = new SortedList<HTNode>();
this->varySet->Init(_HTNode_Compare, this->varySet->_ASC);
//初始化原始集合链表
this->originSet = new SortedList<HTNode>();
this->originSet->Init(_HTNode_Compare, this->originSet->_ASC);
//初始化哈夫曼树 及 叶节点数
this->root = NULL;
this->leafNum = 0;
}
void HfmTree::Init(string &s)
{
/*
. 初始化字符串,并构建哈夫曼树
*/
//1.字符频率统计
_Arr_StatisticCharFreq(s);
//2.创建权值集合单链
_CreateWeightSet();
//3.创建哈夫曼树
_CreateHfmTree();
//4.生成哈夫曼编码
_GenerateHfmCode();
}
void HfmTree::_Arr_StatisticCharFreq(string &s)
{
/*
. 统计字符频率
*/
//指针 p 指向词频数组
int * p = this->charFreqArr;
int sum = s.length(); //总字符数
char c = '\0';
//词频统计
for (int i = 0; i < (int)s.length(); i++)
{
c = s[i];
p[c - this->_START_C]++; //52个字符(a-z|A-Z)数组基底 0 为 'a'
}
//词频数组 归一化
for (int i = 0; i < this->_ARR_SIZE; i++)
{
p[i] = (int)(p[i] * this->_MAGNIFICATION / sum); //放大 100 倍(若某些字符权重太小,可扩大倍数)
}
}
void HfmTree::_CreateWeightSet()
{
/*
. 构建哈夫曼树
*/
//哈夫曼结点变量
HTNode * node = NULL;
//指针 p 指向词频数组
int * p = this->charFreqArr;
//遍历词频数组
for (int i = 0; i < this->_ARR_SIZE; i++)
{
if (p[i] == 0)
continue;
//初始化 树结点
node = new HTNode;
node->c = (char)(i + this->_START_C); //取字符
node->weight = p[i]; //取权重
node->parent = NULL;
node->lchild = NULL;
node->rchild = NULL;
//顺序插入 权重集合单链表
this->varySet->Insert(node);
}
//显示集合
this->varySet->Display();
}
void HfmTree::_CreateHfmTree()
{
/*
. 创建哈夫曼树
*/
//初始化 树结点
HTNode *first = NULL, *second = NULL, *newNode = NULL;
//权值集合 元素结点数 只剩一个时,结束
while (this->varySet->Length() > 1)
{
//获取并删除 权值集合前两个元素 (集合升序排列,前两个为权值最小)
first = this->varySet->Delete(1);
second = this->varySet->Delete(1);
//构建 新权值 根结点,并初始化
newNode = new HTNode;
newNode->c = '\0';
newNode->weight = first->weight + second->weight; //权值相加
newNode->parent = NULL;
newNode->lchild = first;
newNode->rchild = second;
//赋值 两个结点的 双亲
first->parent = newNode;
second->parent = newNode;
//并将新结点 顺序插入集合,并显示集合
this->varySet->Insert(newNode);
this->varySet->Display();
//将删除的两个元素结点(非后建的根结点),加入到 初始集合 中,并显示
if (first->c != '\0')
this->originSet->Insert(first);
if (second->c != '\0')
this->originSet->Insert(second);
this->originSet->Display();
}
//取权重集合链 第一个元素 作为 哈夫曼树根
this->root = this->varySet->Delete(1);
}
void HfmTree::_GenerateHfmCode()
{
/*
. 生成哈夫曼编码
*/
HTNode * p = NULL, *q = NULL;
char c = '\0';
//遍历叶子结点(初始权重集合)
for (int i = 0; i < this->originSet->Length(); i++)
{
//获取叶结点
p = this->originSet->Get(i + 1);
//获取字符
c = p->c;
//从叶节点 到 根 的遍历
while (p->parent != NULL)
{
//q 取 p 的根结点
q = p->parent;
if (q->lchild == p)
this->charCodeArr[c - this->_START_C] = "0" + this->charCodeArr[c - this->_START_C];
else
this->charCodeArr[c - this->_START_C] = "1" + this->charCodeArr[c - this->_START_C];
//p 向根移动
p = p->parent;
}
//置空 游走指针
p = NULL;
q = NULL;
}
}
void HfmTree::HfmCodeDisplay()
{
/*
. 显示哈夫曼编码
*/
HTNode *p = NULL;
for (int i = 0; i < this->originSet->Length(); i++)
{
p = this->originSet->Get(i + 1);
cout << "(" << p->c << ":" << p->weight << ":" << this->charCodeArr[p->c - this->_START_C] << ")" << endl;
}
}
string HfmTree::Encoding(string s)
{
/*
. 编码
*/
//初始化编码字符串
string encodingStr = "";
//遍历字符集
for (int i = 0; i < (int)s.length(); i++)
{
encodingStr = encodingStr + this->charCodeArr[s[i] - this->_START_C];
}
return encodingStr;
}
string HfmTree::Decoding(string s)
{
/*
. 解码
*/
//初始化 解码字符串
string decodingStr = "";
//初始化结点指针:p 指向哈夫曼树根结点
HTNode *p = this->root;
//初始化 编码: 0 1
int code = 0;
//遍历码串
for (int i = 0; i < (int)s.length(); i++)
{
//从根 遍历,按码串路径 寻叶子结点
while (p->lchild != NULL && p->rchild != NULL)
{
//取字符码 0 或 1,转换成 整型
code = s[i] - '0';
// 0|左子树 1|右子树
if (code == 0)
p = p->lchild;
else
p = p->rchild;
//自增 i
i++;
}
//抵消一次自增
i--;
//叶节点字符拼接
decodingStr = decodingStr + p->c;
//指针 p 置到 根结点
p = this->root;
}
return decodingStr;
}
//文件名:"HfmTree_Test.cpp"
#include "stdafx.h"
#include <iostream>
#include "HfmTree.h"
using namespace std;
int main()
{
//利用 s 构建哈夫曼树
string s = "abcadefa";
HfmTree * t = new HfmTree();
t->Init(s);
t->HfmCodeDisplay();
//在构造的哈夫曼树基础上,测试 编码 解码
string s1 = "aaacdeefccccee"; //编码的字符范围不可超过 构建哈夫曼树时的叶节点字符集 范围
string s2 = t->Encoding(s1);
cout << "原文:" << s1 << endl;
cout << "码文:" << s2 << endl;
cout << "解码文:" << t->Decoding(s2) << endl;
return 0;
}