#pragma once
#include<vector>
#include<iostream>
using namespace std;

namespace CloseHash
{
    //当前位置的状态有3种：空、已存在、已删除
	enum State
	{
		EMPTY,
		EXIST,
		DELETE,
	}; 
}

（2）定义HashData

哈希数据应包含两个成员：数据和状态

	template<class K, class V>
	struct HashData
	{
		pair<K, V> _kv;//数据
		State _state = CloseHash::State::EMPTY;//状态
	};

（3）哈希表

哈希表包含两个成员：哈希数据、存储的有效数据的个数

模板有3个参数K、V、HashFunc。

①由于不知道key是K还是pair，所以需要定义两个模板参数K、V来包含key是K或pair的两种情况

②由于不知道key的数据类型是int还是string、pair、struct，计算key的映射位置时需要取模，但是只能对int型取模，string、struct、pair无法取模，HashFunc作为仿函数，它的实例可以分别应对这些类型的取模。

    template<class K, class V, class HashFunc>
	class HashTable
	{
    private:
		vector<HashData<K, V>> _table;//哈希表
		size_t _n = 0;//存储有效数据的个数
	};

（4）查找

①无论传给哈希表的数据是K还是pair，查找时，都需要用K做key来进行查找

②计算元素位置

③如果当前位置元素为key，那么就返回该元素，否则可能发生了冲突，继续向后探测

	public:
        //用K查找
		HashData<K,V>* Find(const K& key)
        {
        	if (_table.size() == 0)
        	{
        		return nullptr;
        	}
        
        	HashFunc hf;//仿函数
        	size_t start = hf(key) % _table.size();//除留余数法，查找元素位置
        	size_t index = start;
        	size_t i = 1;
        	while (_table[index]._state != EMPTY)
        	{
        		if (_table[index]._state == EXITS 
        			&& _table[index]._kv.first == key)//找到了
        		{
        			return &_table[index];//该位置存在且值为key返回地址方便对该数据进行修改
        		}
				
				//冲突时，向后查找
        		index = start + i;//线性探测  //index = start + i*i;//二次探测
        		index %= _table.size();
        		++i;
        	}
        
        	return nullptr;
        }

（5）插入

①先查看key查看在不在，在就插入失败

②第一次插入时，哈希表的的是0，所以第一次插入时就要让表扩容

③还需要判断负载因子是否>0.7，如果表满了，就要开一个新表，并把旧表的数据都插入到新表上

④当计算的位置有数据时，就向后探测，直到探测到空位置即可存入数据

		bool Insert(const pair<K, V>& kv)
		{
			HashData<K, V>* ret = Find(kv.first);
			if (ret)
			{
				return false;
			}

			if (_table.size() == 0)
			{
				_table.resize(10);
			}
			else if ((double)_n / (double)_table.size() > 0.7)//负载因子 > 0.7， 需要增容
			{
				HashTable<K, V, HashFunc> newHashTable;
				newHashTable._table.resize(2 * _table.size());

				for (auto& e : _table)
				{
					if (e._state == EXIST)
					{
						newHashTable.Insert(e._kv);
					}
				}

				_table.swap(newHashTable._table);
			}

			HashFunc hf;
			size_t start = hf(kv.first) % _table.size();
			size_t index = start;

			//探测后面的位置---线性探测
			size_t i = 1;
			while (_table[index]._state == EXIST)
			{
				//状态为State时，就发生了冲突，需要向后找空位置
				index = start + i;
				index %= _table.size();
				++i;
			}

			//找到空位置就存入数据
			_table[index]._kv = kv;
			_table[index]._state = EXIST;
			++_n;

			return true;
		}
		
		//用K查找
		HashData<K, V>* Find(const K& key)
		{
			if (_table.size() == 0)
			{
				return nullptr;
			}

			HashFunc hf;//仿函数
			size_t start = hf(key) % _table.size();//除留余数法，查找元素位置
			size_t index = start;
			size_t i = 1;
			while (_table[index]._state != EMPTY)
			{
				if (_table[index]._state == EXITS
					&& _table[index]._kv.first == key)//找到了
				{
					return &_table[index];//该位置存在且值为key返回地址方便对该数据进行修改
				}

				//冲突时，向后查找
				index = start + i;//线性探测  //index = start + i*i;//二次探测
				index %= _table.size();
				++i;
			}

			return nullptr;
		}

（6）删除

利用假删除，将状态标记为删除即可：

		//删除
		bool Erase(const K& key)
		{
			HashData<K, V>* ret = Find(key);
			if (ret == nullptr)//没找到
			{
				return false;
			}
			else//找到了
			{
				ret->_state = DELETE;
				--_n;

				return false;
			}
		}

（7）仿函数

仿函数的目的是为了让不同类型的数据能够取模，方便计算数据位置

类的仿函数模板，默认支持int：

    template<class K>
	struct Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};

string类型的仿函数，不能用上述仿函数的类模板，因为字符不能取模。string类型的仿函数用来做key的数值尽量要找不重复的，否则会导致发生冲突的概率比较高

	struct StringHashFunc
	{
		//采用BKDR哈希（乘以质数，如131）,会减少冲突
		size_t operator()(const string& s)
		{
			size_t value = 0;
            //取每个字符*131之后的和
			for (auto e : s)
			{     
				value += e;
				value *= 131;
			}
			return value;
		}
	};

任意类型（pair、结构体）都可以做key，key尽量选择不容易重复的成员，跟一个把这个类型对象转换成整形的仿函数。比如一个类型做map/set的key，那就要求该类型能支持比较大小。又比如一个类型做unordered_map/unordered_set的key，那就要求该类型能支持转换成整形+相等比较。

（8）完整代码段

HashTable.h

#pragma once
#include<vector>
#include<iostream>
using namespace std;

namespace CloseHash
{
	//当前位置的状态有3种：空、已存在、删除
	enum State
	{
		EMPTY,
		EXIST,
		DELETE,
	};

	template<class K, class V>
	struct HashData
	{
		pair<K, V> _kv;
		State _state = EMPTY;
	};

	//默认支持整形
	template<class K>
	struct Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};

	//对常用string类型模板特化
	template<>
	struct Hash<string>
	{
		size_t operator()(const string& s)
		{
			size_t value = 0;
			for (auto e : s)
			{
				value += e;
				value *= 131;
			}
			return value;
		}
	};
	

	template<class K, class V, class HashFunc = Hash<K>>
	class HashTable
	{
	public:
		bool Insert(const pair<K, V>& kv)
		{
			HashData<K, V>* ret = Find(kv.first);
			if (ret)
			{
				return false;
			}

			if (_table.size() == 0)
			{
				_table.resize(10);
			}
			else if ((double)_n / (double)_table.size() > 0.7)//负载因子 > 0.7， 需要增容
			{
				HashTable<K, V, HashFunc> newHashTable;
				newHashTable._table.resize(2 * _table.size());

				for (auto& e : _table)
				{
					if (e._state == EXIST)
					{
						newHashTable.Insert(e._kv);
					}
				}

				_table.swap(newHashTable._table);
			}

			HashFunc hf;
			size_t start = hf(kv.first) % _table.size();
			size_t index = start;

			//探测后面的位置---线性探测
			size_t i = 1;
			while (_table[index]._state == EXIST)
			{
				//状态为State时，就发生了冲突，需要向后找空位置
				index = start + i;
				index %= _table.size();
				++i;
			}

			//找到空位置就存入数据
			_table[index]._kv = kv;
			_table[index]._state = EXIST;
			++_n;

			return true;
		}
		
		//用K查找
		HashData<K, V>* Find(const K& key)
		{
			if (_table.size() == 0)
			{
				return nullptr;
			}

			HashFunc hf;//仿函数
			size_t start = hf(key) % _table.size();//除留余数法，查找元素位置
			size_t index = start;
			size_t i = 1;
			while (_table[index]._state != EMPTY)
			{
				if (_table[index]._state == EXITS
					&& _table[index]._kv.first == key)//找到了
				{
					return &_table[index];//该位置存在且值为key返回地址方便对该数据进行修改
				}

				//冲突时，向后查找
				index = start + i;//线性探测  //index = start + i*i;//二次探测
				index %= _table.size();
				++i;
			}

			return nullptr;
		}

		//删除
		bool Erase(const K& key)
		{
			HashData<K, V>* ret = Find(key);
			if (ret == nullptr)
			{
				return false;
			}
			else
			{
				ret->_state = DELETE;
				--_n;

				return false;
			}
		}
	private:
		vector<HashData<K, V>> _table;//哈希表
		size_t _n = 0;//存储有效数据的个数
	};

	void test_CloseHashInt()
	{
		int a[] = { 6,201,35,76,89,2 };
		HashTable<int, int> ht;
		//ht.Insert(make_pair<6, 6>);
		for (auto e : a)
		{
			ht.Insert(make_pair(e,e));
		}
	}

	void test_CloseHashString()
	{
		string a[] = { "篮球","足球","篮球","篮球","羽毛球","羽毛球","乒乓球","羽毛球" };
		HashTable<string, int> ht;
		//ht.Insert(make_pair(6, 6));
		for (auto e : a)
		{
			auto ret = ht.Find(e);
			if (ret)
			{
				ret->_kv.second++;
			}
			else
			{
				ht.Insert(make_pair(e, 1));
			}
		}
	}
}

Test.cpp

#define  _CRT_SECURE_NO_WARNINGS  1
#include "HashTable.h"

int main()
{
	CloseHash::test_CloseHashInt();
	CloseHash::test_CloseHashString();

	return 0;
}

三、用开散列解决哈希冲突

1.开散列介绍

开散列也叫拉链法，先对所有key用散列函数计算散列地址，把有相同地址的key每个key都作为一个桶，通过单链表链接在哈希表中。

因此，开散列的每个桶中存放的都是哈希冲突的元素，负载因子较低。当桶超过一定长度时，就把冲突最多的桶就换成红黑树。实际中哈希桶的结构更实用，因为哈希桶空间利用率高，并且在极端情况下还有解决方案。

2.哈希桶的实现

哈希桶作为指针数组，数组的每个元素是一个结点的指针，链表不需要带哨兵位，且头插的效率比较高。

（1）哈希仿函数

在闭散列中，已经实现了Hash仿函数，用来获取哈希表中的元素的key，方便后续计算映射位置

#pragma once
#include <vector>
#include <iostream>
using namespace std;
namespace OpenHash
{
	template<class K>
	struct Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};
}

模板特化：string元素使用频率较高，进行模板特化

	// 特化
	template<>
	struct Hash < string >
	{
        //采用BKDR哈希进行计算
		size_t operator()(const string& s)
		{
			// BKDR Hash
			size_t value = 0;
			for (auto ch : s)
			{
				value += ch;
				value *= 131;
			}

			return value;
		}
	};

（2）哈希桶节点

哈希桶只需要2个成员：数据、下一个桶指针

	template<class T>
	struct HashNode
	{
		HashNode<T>* _next;
		T _data;

		HashNode(const T& data)
			:_next(nullptr)
			, _data(data)
		{}
	};

（3）哈希表

哈希表有两个成员：哈希表、有效数据的个数

	template<class K,class V,class HashFunc=Hash<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
    private:
		vector<Node*> _table;//哈希表
		size_t _n;//有效数据个数
	};

（4）查找

先计算key在哈希表中的位置，然后后再该位置的哈希桶中遍历查找：

		//查找
		Node* Find(const K& key)
		{
			//哈希表为空
			if (_table.size() == 0)
			{
				return false;
			}

			HashFunc hf;
			size_t index = hf(key) % _table.size();//计算key在哈希表中的位置
			Node* cur = _table[index];

			while (cur)
			{
				if (cur->_kv.first == key)//找到了
				{
					return cur;
				}
				else//没找到
				{
					cur = cur->_next;
				}
			}

			return nullptr;
		}

（5）插入

①查找key在不在哈希表中

②不在就要先判断哈希表是否满了

③若哈希表满了就要重新开一个新的哈希表，将旧表数据全部头插到新表中

④插入数据

		//插入
        bool Insert(const pair<K, V>& kv)
		{
			//在哈希表中已存在
			if (Find(kv))
			{
				return false;
			}

			//哈希表负载因子为1时代表哈希表满了，需要重新开新表，重新计算映射位置
			HashFunc hf;
			if (_n == _table.size())
			{
				vector<Node*> newHashTable;
				newHashTable.resize(GetNextPrime(_table.size()));

				//遍历旧表的所有节点，重新挂到新表中，可能节点映射的位置也发生了变化
				for (size_t i = 0; i < _table.size(); i++)
				{
					if (_table[i])
					{
						Node* cur = _table[i];
						while (cur)
						{
							Node* next = cur->_next;
							size_t index = hf(cur->_kv.first) % newHashTable.size();

							//由于是头插,因此将旧表_table的每个桶的_next都置为新表计算的新位置的第一个桶，将新表的newHashTable[index]置为cur
							cur->_next = newHashTable[index];
							newHashTable[index] = cur;
							cur = next;
						}

						_table[i] = nullptr;
					}
				}
				_table.swap(newHashTable);
			}

			size_t index = hf(kv.first) % _table.size();
			Node* newNode = new Node(kv);

			//不需要增容，直接头插
			newNode->_next = _table[index];
			_table[index] = newNode;
			_n++;

			return true;
		}

（6）删除

①计算key在表中的位置

②要删除的数据是不是该位置的第一个哈希桶，如果是，那就让哈希表的第一个节点变成第二个桶，否则让这个桶的前一个桶指向这个桶的下一个桶

		//删除
		bool Erase(const K& key)
		{
			size_t index = hf(key) % _table.size();
			Node* prev = nullptr;
			Node* cur = _table[index];

			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (_table[index] == cur)//要删除的key就是该位置的第一个桶
					{
						_table[index] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}
					
					--_n;
					delete cur;
					return true;

				}

				prev = cur;
				cur = cur->next;

				return false;
			}
		}

（7）完整代码段

HashTable.h

namespace OpenHash
{
	template<class K>
	struct Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};

	//特化
	template<>
	struct Hash<string>
	{
		size_t operator()(const string& s)
		{
			//采用BKDR哈希计算
			size_t value = 0;
			for (auto e : s)
			{
				value += e;
				value *= 131;
			}

			return value;
		}
	};

	template<class K, class V>
	struct HashNode
	{
		pair<K, V> _kv;
		HashNode<K, V>* _next;

		HashNode(const pair<K, V>& kv)
			:_kv(kv)
			, _next(nullptr)
		{}
	};

	template<class K,class V,class HashFunc=Hash<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:

		//获取质数
		size_t GetNextPrime(size_t prime)
		{
			const int PRIMECOUNT = 28;
			static const size_t primeList[PRIMECOUNT] =
			{
				53ul, 97ul, 193ul, 389ul, 769ul,
				1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
				49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
				1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
				50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
				1610612741ul, 3221225473ul, 4294967291ul
			};

			size_t i = 0;
			for (i = 0; i < PRIMECOUNT; i++)
			{
				if (primeList[i] > prime)
				{
					return primeList[i];
				}
			}
			
			return primeList[i];
		}

		bool Insert(const pair<K, V>& kv)
		{
			//在哈希表中已存在
			if (Find(kv))
			{
				return false;
			}

			//哈希表负载因子为1时代表哈希表满了，需要重新开新表，重新计算映射位置
			HashFunc hf;
			if (_n == _table.size())
			{
				vector<Node*> newHashTable;
				newHashTable.resize(GetNextPrime(_table.size()));

				//遍历旧表的所有节点，重新挂到新表中，可能节点映射的位置也发生了变化
				for (size_t i = 0; i < _table.size(); i++)
				{
					if (_table[i])
					{
						Node* cur = _table[i];
						while (cur)
						{
							Node* next = cur->_next;
							size_t index = hf(cur->_kv.first) % newHashTable.size();

							//由于是头插,因此将旧表_table的每个桶的_next都置为新表计算的新位置的第一个桶，将新表的newHashTable[index]置为cur
							cur->_next = newHashTable[index];
							newHashTable[index] = cur;
							cur = next;
						}

						_table[i] = nullptr;
					}
				}
				_table.swap(newHashTable);
			}

			size_t index = hf(kv.first) % _table.size();
			Node* newNode = new Node(kv);

			//不需要增容，直接头插
			newNode->_next = _table[index];
			_table[index] = newNode;
			_n++;

			return true;
		}

		//查找
		Node* Find(const K& key)
		{
			//哈希表为空
			if (_table.size() == 0)
			{
				return false;
			}

			HashFunc hf;
			size_t index = hf(key) % _table.size();//计算key在哈希表中的位置
			Node* cur = _table[index];

			while (cur)
			{
				if (cur->_kv.first == key)//找到了
				{
					return cur;
				}
				else//没找到
				{
					cur = cur->_next;
				}
			}

			return nullptr;
		}

		//删除
		bool Erase(const K& key)
		{
			size_t index = hf(key) % _table.size();
			Node* prev = nullptr;
			Node* cur = _table[index];

			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (_table[index] == cur)//要删除的key就是该位置的第一个桶
					{
						_table[index] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}
					
					--_n;
					delete cur;
					return true;

				}

				prev = cur;
				cur = cur->next;

				return false;
			}
		}
	private:
		vector<Node*> _table;//哈希表
		size_t _n;//有效数据个数
	};	
}

【C++】-- 哈希算法

一、哈希概念

1.插入和查找

2.哈希表

3.常见的哈希函数

（1）直接定址法

（2）除留余数法

二、用闭散列解决哈希冲突

1.线性探测法介绍

2.线性探测的实现

（1）状态

（2）定义HashData

（3）哈希表

（4）查找

（5）插入

（6）删除

（7）仿函数

（8）完整代码段

三、用开散列解决哈希冲突

1.开散列介绍

2.哈希桶的实现

（1）哈希仿函数

（2）哈希桶节点

（3）哈希表

（4）查找

（5）插入

（6）删除

（7）完整代码段

猜你喜欢