数据结构之二叉树应用(哈夫曼树及哈夫曼编码实现)(C++)

一、哈夫曼树

1.书上用的是静态链表实现,本文中的哈夫曼树用 排序链表 实现;
2.实现了从 字符频率统计、构建权值集合、创建哈夫曼树、生成哈夫曼编码,最后对 给定字符串的编码、解码功能。
3.使用到的 “SortedList.h”头文件,在上篇博文:数据结构之排序单链表。


二、构建过程


三、代码

//文件名:"HfmTree.h"
#pragma once
#include "SortedList.h"		//"C1_Test.h" 排序列表
#include <string>
using namespace std;
/*
.	二叉树应用:哈夫曼树及哈夫曼编码实现
.	存储结构:三叉链表
*/

//哈夫曼树结点
struct HTNode
{
	char c;					//字符域
	int weight;				//权重
	HTNode * parent;		//双亲结点
	HTNode * lchild;		//左指针域
	HTNode * rchild;		//右指针域
	friend ostream & operator <<(ostream& out, HTNode *p)
	{
		/*
		.	友元函数重载输出操作符,实现对象输出
		*/
		out << "(" << p->c << ":" << p->weight << ")";
		return out;
	}
};

class HfmTree
{
private:
	/*
	.	词频数组
	.	目前支持:英文字符(含大小写),共52个
	*/
	static const int _ARR_SIZE = 52;			//词频数组大小
	static const char _START_C = 'a';			//词频数组 0 下标对应的 字符 'a'
	static const int _MAGNIFICATION = 100;		//词频放大倍数
	int charFreqArr[_ARR_SIZE]{0};				//词频数组(含大小写),初始化为 0
	string charCodeArr[_ARR_SIZE]{ "" };		//字符编码数组

	void _Arr_StatisticCharFreq(string &s);		//统计字符频率

	/*
	,	权值集合排序单链表
	*/
	SortedList<HTNode> * varySet;				//变化的权值集合 链表(用于构建哈夫曼树根结点的生成)
	SortedList<HTNode> * originSet;				//初始的权重集合 链表(用于存放 叶节点指针)
	
	/*
	.	哈夫曼树
	*/
	HTNode * root;								//哈夫曼树根结点
	int leafNum;								//叶结点数
	void _CreateWeightSet();					//创建权值集合(排序单链表)
	void _CreateHfmTree();						//构建哈夫曼树
	void _GenerateHfmCode();					//生成哈夫曼编码

public:
	HfmTree();									//无参构造
	void Init(string &s);						//初始化字符串
	void HfmCodeDisplay();						//显示哈夫曼编码
	string Encoding(string s);					//编码
	string Decoding(string s);					//解码
};
//文件名:"HfmTree.cpp"
#include "stdafx.h"
#include <iostream>
#include <string>
#include "HfmTree.h"
using namespace std;

int _HTNode_Compare(HTNode * e1, HTNode *e2)
{
	/*
	.	实现 SortedList 类的 compare 接口
	*/
	if (e1->weight > e2->weight)
		return 1;
	else if (e1->weight == e2->weight)
		return 0;
	else
		return -1;
}

HfmTree::HfmTree()
{
	/*
	.	无参构造
	*/
	//初始化变化集合链表
	this->varySet = new SortedList<HTNode>();
	this->varySet->Init(_HTNode_Compare, this->varySet->_ASC);
	//初始化原始集合链表
	this->originSet = new SortedList<HTNode>();
	this->originSet->Init(_HTNode_Compare, this->originSet->_ASC);
	//初始化哈夫曼树 及 叶节点数
	this->root = NULL;
	this->leafNum = 0;
}

void HfmTree::Init(string &s)
{
	/*
	.	初始化字符串,并构建哈夫曼树
	*/
	//1.字符频率统计
	_Arr_StatisticCharFreq(s);
	//2.创建权值集合单链
	_CreateWeightSet();
	//3.创建哈夫曼树
	_CreateHfmTree();
	//4.生成哈夫曼编码
	_GenerateHfmCode();
}

void HfmTree::_Arr_StatisticCharFreq(string &s)
{
	/*
	.	统计字符频率
	*/
	//指针 p 指向词频数组
	int * p = this->charFreqArr;
	int sum = s.length();	//总字符数
	char c = '\0';
	//词频统计
	for (int i = 0; i < (int)s.length(); i++)
	{
		c = s[i];
		p[c - this->_START_C]++;	//52个字符(a-z|A-Z)数组基底 0 为 'a'
	}
	//词频数组 归一化
	for (int i = 0; i < this->_ARR_SIZE; i++)
	{
		p[i] = (int)(p[i] * this->_MAGNIFICATION / sum);	//放大 100 倍(若某些字符权重太小,可扩大倍数)
	}
}

void HfmTree::_CreateWeightSet()
{
	/*
	.	构建哈夫曼树
	*/
	//哈夫曼结点变量
	HTNode * node = NULL;
	//指针 p 指向词频数组
	int * p = this->charFreqArr;
	//遍历词频数组
	for (int i = 0; i < this->_ARR_SIZE; i++)
	{
		if (p[i] == 0)
			continue;
		//初始化 树结点
		node = new HTNode;
		node->c = (char)(i + this->_START_C);	//取字符
		node->weight = p[i];					//取权重
		node->parent = NULL;
		node->lchild = NULL;
		node->rchild = NULL;
		//顺序插入 权重集合单链表
		this->varySet->Insert(node);
	}
	//显示集合
	this->varySet->Display();
}

void HfmTree::_CreateHfmTree()
{
	/*
	.	创建哈夫曼树
	*/
	//初始化 树结点
	HTNode *first = NULL, *second = NULL, *newNode = NULL;
	//权值集合 元素结点数 只剩一个时,结束
	while (this->varySet->Length() > 1)
	{
		//获取并删除 权值集合前两个元素 (集合升序排列,前两个为权值最小)
		first = this->varySet->Delete(1);
		second = this->varySet->Delete(1);
		//构建 新权值 根结点,并初始化
		newNode = new HTNode;
		newNode->c = '\0';
		newNode->weight = first->weight + second->weight;	//权值相加
		newNode->parent = NULL;
		newNode->lchild = first;
		newNode->rchild = second;
		//赋值 两个结点的 双亲
		first->parent = newNode;
		second->parent = newNode;
		//并将新结点 顺序插入集合,并显示集合
		this->varySet->Insert(newNode);
		this->varySet->Display();
		//将删除的两个元素结点(非后建的根结点),加入到 初始集合 中,并显示
		if (first->c != '\0')
			this->originSet->Insert(first);
		if (second->c != '\0')
			this->originSet->Insert(second);
		this->originSet->Display();
	}
	//取权重集合链 第一个元素 作为 哈夫曼树根
	this->root = this->varySet->Delete(1);
}

void HfmTree::_GenerateHfmCode()
{
	/*
	.	生成哈夫曼编码
	*/
	HTNode * p = NULL, *q = NULL;
	char c = '\0';
	//遍历叶子结点(初始权重集合)
	for (int i = 0; i < this->originSet->Length(); i++)
	{
		//获取叶结点
		p = this->originSet->Get(i + 1);
		//获取字符
		c = p->c;
		//从叶节点 到 根 的遍历
		while (p->parent != NULL)
		{
			//q 取 p 的根结点
			q = p->parent;
			if (q->lchild == p)
				this->charCodeArr[c - this->_START_C] = "0" + this->charCodeArr[c - this->_START_C];
			else
				this->charCodeArr[c - this->_START_C] = "1" + this->charCodeArr[c - this->_START_C];
			//p 向根移动
			p = p->parent;
		}
		//置空 游走指针
		p = NULL;
		q = NULL;
	}
}

void HfmTree::HfmCodeDisplay()
{
	/*
	.	显示哈夫曼编码
	*/
	HTNode *p = NULL;
	for (int i = 0; i < this->originSet->Length(); i++)
	{
		p = this->originSet->Get(i + 1);
		cout << "(" << p->c << ":" << p->weight << ":" <<  this->charCodeArr[p->c - this->_START_C] << ")" << endl;
	}
}

string HfmTree::Encoding(string s)
{
	/*
	.	编码
	*/
	//初始化编码字符串
	string encodingStr = "";
	//遍历字符集
	for (int i = 0; i < (int)s.length(); i++)
	{
		encodingStr = encodingStr + this->charCodeArr[s[i] - this->_START_C];
	}
	return encodingStr;
}

string HfmTree::Decoding(string s)
{
	/*
	.	解码
	*/
	//初始化 解码字符串
	string decodingStr = "";
	//初始化结点指针:p 指向哈夫曼树根结点
	HTNode *p = this->root;
	//初始化 编码: 0 1
	int code = 0;
	//遍历码串
	for (int i = 0; i < (int)s.length(); i++)
	{
		//从根 遍历,按码串路径 寻叶子结点
		while (p->lchild != NULL && p->rchild != NULL)
		{
			//取字符码 0 或 1,转换成 整型
			code = s[i] - '0';
			// 0|左子树  1|右子树
			if (code == 0)
				p = p->lchild;
			else
				p = p->rchild;
			//自增 i
			i++;
		}
		//抵消一次自增
		i--;
		//叶节点字符拼接
		decodingStr = decodingStr + p->c;
		//指针 p 置到 根结点
		p = this->root;
	}
	return decodingStr;
}
//文件名:"HfmTree_Test.cpp"
#include "stdafx.h"
#include <iostream>
#include "HfmTree.h"
using namespace std;

int main()
{
	//利用 s 构建哈夫曼树
	string s = "abcadefa";
	HfmTree * t = new HfmTree();
	t->Init(s);
	t->HfmCodeDisplay();

	//在构造的哈夫曼树基础上,测试 编码 解码
	string s1 = "aaacdeefccccee";	//编码的字符范围不可超过 构建哈夫曼树时的叶节点字符集 范围
	string s2 = t->Encoding(s1);
	cout << "原文:" << s1 << endl;
	cout << "码文:" << s2 << endl;
	cout << "解码文:" << t->Decoding(s2) << endl;

	return 0;
}

猜你喜欢

转载自blog.csdn.net/weixin_39469127/article/details/80573973