实验要求

压缩软件是利用特定算法来压缩数据的工具，压缩后生成的文件称为压缩包(archive)。如果想使用其中的数据，就得用压缩软件对数据进行解压。利用压缩软件对文件中重复的数据进行压缩，可以减小文件中的字节总数，使文件能够通过互联网连接实现更快传输，此外还可以减少文件的磁盘占用空间。常用的压缩软件有 rar、zip 等。

压缩可以分为无损压缩与有损压缩两种。无损压缩后的文件，经过解压能够完全恢复原始数据；有损压缩的文件则无法完全恢复。rar、zip 等格式都是无损压缩格式。音乐文件格式 mp3、图片文件格式 jpg 都是有损压缩格式。

计算机文件是由一个个字节组成的，1 个字节有 0~255 共 256 种可能的值，每个字节的编码长度都是 8 位。由于文件中的字节总是会重复出现，可以对不同的字节设计长度不等的编码，让出现次数较多的字节，采用尽可能短的编码，那么文件编码的总长便可减
少。

统计文件中 256 种不同的字节重复的次数，以每种字节重复的次数作为权值(weight)，构造一棵有 256 个叶子节点的二叉树。若带权路径长度达到最小，称这样的二叉树为最优二叉树，即 Huffman 树(Huffman tree)。

Huffman 树从根到每个叶子都有一条路径。对路径上的各分支，约定指向左子树根的分支编码为“0”，指向右子树根的分支编码为“1”。从根到每个叶子相应路径上的“0”和“1”组成的序列，就是这个叶子节点的编码，称为 Huffman 编码。

功能要求：使用 Huffman 压缩算法，对一幅 BMP 格式的图片文件进行压缩。图片文件名为“Pic.bmp”，内容如下图所示。压缩后保存为“Pic.bmp.huf”文件。
在这里插入图片描述

效果展示

哈夫曼图片压缩

在这里插入图片描述

源码

Compress.cpp

#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <stdlib.h>
#include "Compress.h"
#include"Huffman.h"
#include"global.h"

using namespace std;;

const int SIZE = 256;

//实现文件压缩
int Compress(const char* pFilename)
{
    
    
	cout << endl;
	//打开并扫描文件
	cout << "正在读取文件……" << endl << endl;
	int weight[256] = {
    
     0 };
	FILE* in = fopen(pFilename, "rb");

	int tempch;
	//获取权重
	while ((tempch = getc(in)) != EOF)
		weight[tempch]++;

	int temp;
	cout << "输入数字“1”显示256种字节出现次数，输入其它数字则不显示！" << endl;
	cout << "请输入您的选择:";
	cin >> temp;
	cout << endl;
	if (temp == 1) {
    
    
		//测试，显示256种字节出现的次数
		showWeight(weight);
		cout << endl;
	}

	cout << "文件读取完毕！\n" << endl;
	//关闭文件
	fclose(in);

	//将编码生成Huffman树
	//Huffman树共有n个叶子节点
	int n = 256;
	//那么就有2n+1个节点
	int m = 2 * n - 1;
	//定义Huffman树
	HuffmanTree pHT = new HTNode[m + 1];
	CreateHuffmanTree(pHT, weight, n);

	int temp1;
	cout << "输入数字“1”显示Huffman 树每个节点的信息，输入其它数字则不显示！" << endl;
	cout << "请输入您的选择:";
	cin >> temp1;
	cout << endl;
	if (temp1 == 1) {
    
    
		//测试，输出 Huffman 树每个节点的信息
		TestHufTree(pHT);
		cout << endl;
	}

	//生成Huffman编码
	char** pHC = new char* [n + 1]; //编码
	for (int i = 1; i <= n; i++)
		pHT[i].weight = weight[i - 1];
	HuffmanCoding(pHC, pHT);

	int temp2;
	cout << "输入数字“1”显示字节的Huffman编码信息，输入其它数字则不显示！" << endl;
	cout << "请输入您的选择:";
	cin >> temp2;
	cout << endl;
	if (temp2 == 1) {
    
    
		//测试，显示字节的Huffman编码信息
		cout << "\n哈夫曼树的编码信息为：" << endl;
		cout << "Byte\tHuffmanCode" << endl;
		TestHufCode(511, pHT, pHC);
		cout << endl;
	}

	//计算编码缓冲区大小
	int nSize = 0;
	for (int i = 0; i < 256; i++)
		nSize += weight[i] * strlen(pHC[i + 1]);
	nSize = (nSize % 8) ? nSize / 8 + 1 : nSize / 8;


	//对编码文件进行压缩
	char* pBuffer = NULL;
	pBuffer = new char[nSize];
	memset(pBuffer, 0, (nSize) * sizeof(char));
	Encode(pFilename, pHC, pBuffer, nSize);
	if (!pBuffer) {
    
    
		return ERROR;
	}

	HEAD sHead;
	InitHead(pFilename, sHead);
	cout << "原文件大小：" << sHead.length << "字节" << endl;
	int afterlen = WriteFile(pFilename, sHead, pBuffer, nSize);
	cout << "压缩后文件大小：" << afterlen << "字节" << endl;
	cout << "压缩比率：" << (double)afterlen * 100 / sHead.length << "%" << endl;

	delete pHT;
	delete[] pHC;
	delete pBuffer;

	return OK;
}

//扫描文件和初始化头文件的信息
int InitHead(const char* pFilname, HEAD& sHead)
{
    
    
	//文件类型
	strcpy(sHead.type, "HUF");
	//源文件长度
	sHead.length = 0;
	for (int i = 0; i < SIZE; i++)
		//权值
		sHead.weight[i] = 0;

	//以二进制流形式打开文件
	FILE* in = fopen(pFilname, "rb");

	//扫描文件，获得权重
	int ch;
	while ((ch = fgetc(in)) != EOF) {
    
    
		sHead.weight[ch]++;
		sHead.length++;
	}

	//关闭文件
	fclose(in);
	in = NULL;
	return OK;
}

//实现压缩编码
int Encode(const char* pFilname, const HuffmanCode pHC, char* pBuffer, const int nSize)
{
    
    
	//打开文件
	FILE* in = fopen(pFilname, "rb");

	//开辟缓冲区ni
	pBuffer = (char*)malloc(nSize * sizeof(char));
	if (!pBuffer)
		cout << "开辟缓冲区失败!" << endl;

	//工作区
	char cd[SIZE] = {
    
     0 };
	//缓冲区指针
	int pos = 0;
	int ch;

	//扫描文件
	while ((ch = fgetc(in)) != EOF) {
    
    
		strcat(cd, pHC[ch + 1]);
		//压缩编码
		while (strlen(cd) >= 8) {
    
    
			pBuffer[pos++] = Str2byte(cd);
			for (int i = 0; i < SIZE - 8; i++) {
    
    
				cd[i] = cd[i + 8];
			}
		}
	}
	if (strlen(cd) > 0) {
    
    
		pBuffer[pos++] = Str2byte(cd);
	}

	fclose(in);

	return OK;
}

//生成压缩文件
int WriteFile(const char* pFilename, const HEAD sHead, const char* pBuffer, const int nSize)
{
    
    
	//生成文件名
	char filename[256] = {
    
     0 };
	strcpy(filename, pFilename);
	strcat(filename, ".huf");

	//以二进制流形式打开文件
	FILE* out = fopen(filename, "wb");

	//写文件
	fwrite(&sHead, sizeof(HEAD), 1, out);

	//写压缩后的编码
	fwrite(pBuffer, sizeof(char), nSize, out);
	//关闭文件，释放文件指针
	fclose(out);
	out = NULL;

	cout << "生成压缩文件：" << filename << endl;
	int len = sizeof(HEAD) + strlen(pFilename) + 1 + nSize;
	return len;
}

//将字符串转换成字节
char Str2byte(const char* pBinStr)
{
    
    
	char b = 0x00;
	for (int i = 0; i < 8; i++) {
    
    
		//左移一位
		b = b << 1;
		if (pBinStr[i] == '1') {
    
    
			b = b | 0x01;
		}
	}
	return b;
}

Compress.h

#pragma once
#include"Huffman.h" //Encode函数声明会用到HuffmanCode

//文件头
struct HEAD
{
    
    
	char type[4];
	int length;
	int weight[256];
};

//实现文件压缩
int Compress(const char* pFilename);

//读取源文件和初始化头文件的信息
int InitHead(const char* pFilname, HEAD& sHead);

//利用Huffman编码 实现压缩编码
int Encode(const char* pFilname, const HuffmanCode pHC, char* pBuffer, const int nSize);

//将二进制字符串转换成字节
char Str2byte(const char* pBinStr);

//生成压缩文件
int WriteFile(const char* pFilename, const HEAD sHead, const char* pBuffer, const int nSize);

global.h

#pragma once

#define OK 1
#define ERROR 0

Huffman.cpp

#define _CRT_SECURE_NO_WARNINGS
#include<iostream>
#include<malloc.h>
#include"Huffman.h"
#include"global.h"

using namespace std;

// 显示 256 种字节的出现的次数 
void showWeight(int weight[])
{
    
    
	cout << "原文件每个字符的权值为：" << endl;
	cout << "Byte\t" << "Weight\t" << endl;
	for (int i = 0; i < 256; i++)
		printf("0x%02X\t%d\n", i, weight[i]);
}

//生成Huffman树
int CreateHuffmanTree(HuffmanTree pHT, int weight[], int n)
{
    
    
	int s1, s2, i;
	int m = 2 * n - 1;

	//初始化
	for (i = 1; i <= n; i++) {
    
    
		pHT[i].weight = weight[i - 1];
		pHT[i].lchild = 0;
		pHT[i].rchild = 0;
		pHT[i].parent = 0;
	}
	for (i = n + 1; i <= m; i++) {
    
    
		pHT[i].parent = 0;
		pHT[i].lchild = 0;
		pHT[i].rchild = 0;
		pHT[i].weight = 0;
	}

	for (i = n + 1; i <= m; i++) {
    
    
		//从pHT[1...i-1]中选择parent为0且weight最小的两个结点，其序号分别为s1和s2
		Select(pHT, i - 1, s1, s2);
		pHT[s1].parent = i;
		pHT[s2].parent = i; //修改s1和s2结点的父指针parent

		pHT[i].lchild = s1;
		pHT[i].rchild = s2; //修改i结点的左右孩子指针

		pHT[i].weight = pHT[s1].weight + pHT[s2].weight; //修改权值
	}
	return OK;
}

//查找Huffman树节点数组中权值最小的节点
void Select(HuffmanTree& pHT, int i, int& s1, int& s2)
{
    
    
	int minValue = 0x7FFFFFFF;

	//找到最小的一个权值
	for (int j = 1; j <= i; j++) {
    
    
		if (pHT[j].parent == 0 && pHT[j].weight < minValue) {
    
    
			minValue = pHT[j].weight;
			s1 = j;
		}
	}

	minValue = 0x7FFFFFFF;
	//找到倒数第二小的权值
	for (int j = 1; j <= i; j++) {
    
    
		if (j != s1 && pHT[j].parent == 0 && pHT[j].weight < minValue) {
    
    
			minValue = pHT[j].weight;
			s2 = j;
		}
	}
}

//生成Huffman编码 
int HuffmanCoding(HuffmanCode& pHC, HuffmanTree& pHT)
{
    
    
	//无栈非递归遍历Huffman树，求Huffman编码
	//记录访问路径
	char cd[256] = {
    
     '\0' };
	//记录当前路径长度
	int cdlen = 0;

	//遍历Huffman树时用做节点的状态标志
	for (int i = 1; i < 512; i++)
		pHT[i].weight = 0;


	int p = 511;
	while (p != 0) {
    
    
		//向左
		if (pHT[p].weight == 0) {
    
    
			pHT[p].weight = 1;
			if (pHT[p].lchild != 0) {
    
    
				p = pHT[p].lchild;
				cd[cdlen++] = '0';
			}
			//登记叶子节点的字符的编码
			else if (pHT[p].rchild == 0) {
    
    
				pHC[p] = (char*)malloc((cdlen + 1) * sizeof(char));
				cd[cdlen] = '\0';
				strcpy(pHC[p], cd);//复制编码
			}
		}
		//向右
		else if (pHT[p].weight == 1) {
    
    
			pHT[p].weight = 2;
			//右孩子为叶子节点
			if (pHT[p].rchild != 0) {
    
    
				p = pHT[p].rchild;
				cd[cdlen++] = '1';
			}
		}
		//退回父节点，编码长度减一
		else {
    
    
			pHT[p].weight = 0;
			p = pHT[p].parent;
			cdlen--;
		}
	}
	return OK;
}

//测试函数，输出哈夫曼树的每个节点信息
int TestHufTree(HuffmanTree pHT) {
    
    
	cout << "哈夫曼树的每个节点信息为：" << endl;
	cout << "Byte\t\tWeight\tParent\tLchild\tRchild\n";
	for (int i = 1; i < 512; i++) {
    
    
		//判断语句为了对齐格式
		if (i <= 99)
			cout << "pHT[" << i << "]\t\t" << pHT[i].weight << "\t" << pHT[i].parent << "\t" << pHT[i].lchild << "\t" << pHT[i].rchild << endl;
		else
			cout << "pHT[" << i << "]\t" << pHT[i].weight << "\t" << pHT[i].parent << "\t" << pHT[i].lchild << "\t" << pHT[i].rchild << endl;

	}
	return OK;
}

//测试函数，采用先序遍历的方法，输出 Huffman 树每个节点的信息
void TestHufCode(int root, HuffmanTree& pHT, HuffmanCode& pHC)
{
    
    
	if (root <= 1) return;

	if (pHT[root].lchild == 0 && pHT[root].rchild == 0)
		printf("0x%02X\t%s\n", root - 1, pHC[root - 1]);
	if (pHT[root].lchild)//访问左孩子
		TestHufCode(pHT[root].lchild, pHT, pHC);
	if (pHT[root].rchild)//访问右孩子
		TestHufCode(pHT[root].rchild, pHT, pHC);
}

Huffman.h

#pragma once

//Huffman树节点
typedef struct
{
    
    
	int weight;	//权值
	int parent;	//父节点
	int lchild;	//左孩子
	int rchild;	//右孩子
}HTNode, * HuffmanTree;

//Huffman编码
typedef char** HuffmanCode;

// 显示 256 种字节的出现的次数
void showWeight(int weight[]);

//生成Huffman树
int CreateHuffmanTree(HuffmanTree pHT, int weight[], int n);

//生成Huffman编码
int HuffmanCoding(HuffmanCode& pHC, HuffmanTree& pHT);

//查找 Huffman 树结点数组中权值最小的节点
void Select(HuffmanTree& HT, int i, int& s1, int& s2);

//测试函数，输出哈夫曼树的每个节点信息
int TestHufTree(HuffmanTree pHT);

//测试函数，采用先序遍历的方法，输出 Huffman 树每个节点的信息
void TestHufCode(int root, HuffmanTree& pHT, HuffmanCode& pHC);

main.cpp

#include<iostream>
#include"Compress.h"
using namespace std;

int main(void) {
    
    

	cout << "========== Huffman 文件压缩 ==========" << endl;

	cout << "请输入文件名：";
	char  filename[256];
	cin >> filename;

	if (Compress(filename) == 1)
		cout << "\n文件压缩成功！" << endl;
	else
		cout << "\n文件压缩失败！" << endl;

	return 0;
}

[数据结构与算法综合实验]二叉树与哈夫曼图片压缩