C++：哈希，unordered_map和unordered_set

#include<iostream>
#include <set>
#include <map>
#include <unordered_set>
#include <unordered_map>
#include <time.h>
using namespace std;

void test_set()
{
	unordered_set<int> s;
	//set<int> s;
	s.insert(2);
	s.insert(3);
	s.insert(1);
	s.insert(2);
	s.insert(5);

	//unordered_set<int>::iterator it = s.begin();
	auto it = s.begin();
	while (it != s.end())
	{
		cout << *it << " ";
		++it;
	}
	cout << endl;

	for (auto e : s)
	{
		cout << e << " ";
	}
	cout << endl;
}

void test_op()    //用于记录插入1000w个数两个容器花费时间的函数
{
	int n = 10000000;
	vector<int> v;
	v.reserve(n);
	srand(time(0));
	for (int i = 0; i < n; ++i)
	{
		//v.push_back(i);
		//v.push_back(rand()+i);  // 重复少
		v.push_back(rand());  // 重复多
	}

	size_t begin1 = clock();
	set<int> s;
	for (auto e : v)
	{
		s.insert(e);
	}
	size_t end1 = clock();

	size_t begin2 = clock();
	unordered_set<int> us;
	for (auto e : v)
	{
		us.insert(e);
	}
	size_t end2 = clock();

	cout << s.size() << endl;

	cout << "set insert:" << end1 - begin1 << endl;
	cout << "unordered_set insert:" << end2 - begin2 << endl;


	size_t begin3 = clock();
	for (auto e : v)
	{
		s.find(e);
	}
	size_t end3 = clock();

	size_t begin4 = clock();
	for (auto e : v)
	{
		us.find(e);
	}
	size_t end4 = clock();
	cout << "set find:" << end3 - begin3 << endl;
	cout << "unordered_set find:" << end4 - begin4 << endl;

	
	size_t begin5 = clock();
	for (auto e : v)
	{
		s.erase(e);
	}
	size_t end5 = clock();

	size_t begin6 = clock();
	for (auto e : v)
	{
		us.erase(e);
	}
	size_t end6 = clock();
	cout << "set erase:" << end5 - begin5 << endl;
	cout << "unordered_set erase:" << end6 - begin6 << endl;
}

void test_map()
{
	unordered_map<string, string> dict;
	dict.insert(make_pair("sort", "排序"));
	dict.insert(make_pair("left", "左边"));
	dict.insert(make_pair("left", "剩余"));
	dict["string"];
	dict["left"] = "剩余";
	dict["string"] = "字符串";
}

int main()
{
	//test_set();
	//test_op();
	test_map();

	return 0;
}

二.哈希

哈希（散列）-映射：存储关键字跟存储位置建立关联关系

值和存储位置——> 映射关系 ——> 查找按映射关系去找

哈希是一种用来进行高效查找的数据结构，查找的时间复杂度平均为O(1)

采用哈希方式解决问题时，必须使用哈希函数

哈希查找的时间复杂度不一定是O(1)（因为存在哈希冲突，一般基本都是O(1)）

哈希是以牺牲空间为代价，提高查询的效率（采用哈希处理时，一般所需空间都会比元素个数多，否则产生冲突的概率就比较大，影响哈希的性能）

直接建立映射关系问题:
1、数据范围分布很广、不集中（除留余数法解决）
2、key的数据不是整数,是字符串怎么办?是自定义类型对象怎么办?

1. 直接定址法--(数分布集中常用)

取关键字的某个线性函数为散列地址：Hash（Key）= A*Key + B

优点：简单、均匀

缺点：需要事先知道关键字的分布情况

使用场景：适合查找比较小且集中连续的情况

2. 除留余数法--(数分布不集中,均匀常用)

key %表大小——>映射的位置

下面都假设这个表大小是10

除留余数法会有个问题，就是哈希冲突

3.哈希冲突

不同关键字通过相同哈希哈数计算出相同的哈希地址，该种现象称为哈希冲突 或哈希碰撞。

例如：除留余数法中，20%表大小10=0，把20放在下标0处，如果有30,50呢，30,50%10也得放在下标0处，所以要解决哈希冲突，解决哈希冲突两种常见的方法是：闭散列和开散列。

（2）闭散列

闭散列：也叫开放定址法，当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有

空位置，那么可以把key存放到冲突位置中的“下一个” 空位置中去。那如何寻找下一个空位置

呢？闭散列分为线性探测和二次探测

线性探测：

从发生冲突的位置开始，依次向后探测，直到寻找到下一个空位置为止。

hash(key)%N + i（i= 0,1,2,3...）：比如10%N(N是表大小10)=0，因为下标0已经放了20，则再加i=1，10%10+1=1，就把10放到了下标是1的地方。30：放30就是30%10+2=2，把30放到了下标是2的地方

缺点:我占你的，你占他的，拥堵

添加状态

比如查找50时，一直找到空就停止，但是如果在50之前有删除的数据，就会在删除数据的位置停下，这样就找不到50了，所以添加个状态用于区分空和删除的位置。遇到空EMPTY就停，遇到删除DELETE和存在 EXITS就不停

enum State   
{
	EMPTY,
	EXITS,
	DELETE
};

二次探测

线性探测的缺陷是产生冲突的数据堆积在一块，这与其找下一个空位置有关系，因为找空位

置的方式就是挨着往后逐个去找，因此二次探测为了避免该问题，改成每次跳 i² 个

hash(key)%N + i² （i= 0,1,2,3...）

比如10%N(N是表大小10)=0，因为下标0已经放了20，则再加 i² =1，10%10+1=1，就把10放到了下标是1的地方。30：放30就是30%10+2²=4，把30放到了下标是4的地方

对上面方法的优化:不那么拥堵

starti %= _tables.size();还是.capacity() ？

_tables.size()是能存数据的位置个数，_tables.capacity()是总容量大小（包括没有初始化的空间）类似resize和reserve的区别，starti %= _tables.size(); 插入数据只能插在能插入的位置，因为_tables[hashi] 的operator[] 会自动检查这个位置是否有值还是没有值的随机数，有值就可以插入。

4.负载因子

负载因子（散列表的载荷因子α） = 填入表中的元素个数 / 哈希表的长度

由于表长是定值，α与“填入表中的元素个数”成正比，所以，α越大，表明填入表中的元素越多，产生冲突的可能性就越大:反之，α越小，标明填入表中的元素越少，产生冲突的可能性就越小。

哈希表什么情况下进行扩容？如何扩容？

负载因子大于0.7就扩容。

5.哈希冲突的处理方法和哈希函数区分

哈希函数作用是：建立元素与其存储位置之前的对应关系的，在存储元素时，先通过哈希函数计算元素在哈希表格中的存储位置，然后存储元素。好的哈希函数可以减少冲突的概率，但是不能绝对避免哈希函数，万一发生哈希冲突，得需要借助哈希冲突处理方法来解决。

常见的哈希函数有：直接定址法、除留余数法、平方取中法、随机数法、数字分析法、叠加法等

常见哈希冲突处理：闭散列（线性探测、二次探测）、开散列(链地址法)、多次散列

常见哈希函数 ：

1. 直接定址法--(常用)

取关键字的某个线性函数为散列地址：Hash（Key）= A*Key + B

优点：简单、均匀

缺点：需要事先知道关键字的分布情况

使用场景：适合查找比较小且连续的情况

2. 除留余数法--(常用)

设散列表中允许的地址数为m，取一个不大于m，但最接近或者等于m的质数p作为除数，

按照哈希函数：Hash(key) = key% p(p<=m),将关键码转换成哈希地址

3. 平方取中法--(了解)

假设关键字为1234，对它平方就是1522756，抽取中间的3位227作为哈希地址；

再比如关键字为4321，对它平方就是18671041，抽取中间的3位671(或710)作为哈希地址

平方取中法比较适合：不知道关键字的分布，而位数又不是很大的情况

4. 折叠法--(了解)

折叠法是将关键字从左到右分割成位数相等的几部分(最后一部分位数可以短些)，然后将这

几部分叠加求和，并按散列表表长，取后几位作为散列地址。

折叠法适合事先不需要知道关键字的分布，适合关键字位数比较多的情况

5. 随机数法--(了解)

选择一个随机函数，取关键字的随机函数值为它的哈希地址，即H(key) = random(key),其中

random为随机数函数。

通常应用于关键字长度不等时采用此法

6. 数学分析法--(了解)

设有n个d位数，每一位可能有r种不同的符号，这r种不同的符号在各位上出现的频率不一定

相同，可能在某些位上分布比较均匀，每种符号出现的机会均等，在某些位上分布不均匀只

有某几种符号经常出现。可根据散列表的大小，选择其中各种符号分布均匀的若干位作为散

列地址。

阶段一：只有插入的哈希

#pragma once
#include<vector>

enum State    
{
	EMPTY,
	EXITS,
	DELETE
};

// 休息21:16继续
template<class K, class V>
struct HashData
{
	pair<K, V> _kv;
	State _state;
};

template<class K, class V>
class HashTable
{
public:
	bool Insert(const pair<K, V>& kv)
	{
// 负载因子到0.7及以上，就扩容。这里算 负载因子>0.7,因为是小数，前面可能需要类型转换，否则除出来是0，我们不如把 负载因子*10>7
		if (_tables.size() == 0 || _n * 10 / _tables.size() >= 7)
		{    
			size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
			// 扩容以后，需要重新映射
			HashTable<K, V> newHT;
			newHT._tables.resize(newSize);
			// 遍历旧表，插入newHT
			for (auto& e : _tables)
			{
				if (e._state == EXITS)
				{
					newHT.Insert(e._kv);
				}
			}
			newHT._tables.swap(_tables);
		}

		size_t starti = kv.first;
		starti %= _tables.size();

		size_t hashi = starti;
		size_t i = 1;
		// 线性探测/二次探测
		while (_tables[hashi]._state == EXITS)
		{
			hashi = starti + i;
			++i;
			hashi %= _tables.size();
		}

		_tables[hashi]._kv = kv;
		_tables[hashi]._state = EXITS;
		_n++;
	}
	
	HashData* Find(const K& key);
	bool Erase(const K& key);

private:
	vector<HashData> _tables;
	size_t _n = 0; // 存储关键字个数
};

阶段二：完善哈希表，雏形哈希桶

1.当key是string或其他类型，如何映射？

key是string或者自定义类型时，需要转成整数再映射进哈希表，这个转成整数的方法有多样，比如：string类型转成整数方法是BKDR法，hash乘一个值131再加字符串的ASCII值，这样能减少冲突。如果仅仅是通过整个字符串的ASCII值来来映射，比如"abc"和"bac"就一样，一样就是哈希冲突，哈希冲突也没关系，我们有处理哈希冲突的机制：把映射到同一位置的值像下一个位置放（线性探测）。类似于先放20后再放10，映射在大小是10的哈希表中，都放在0处，哈希冲突了，就可以把10放在20后面的空位置上。

    template<>
	struct DefaultHash<string>
	{
		size_t operator()(const string& key)
		{             //key是string类型用仿函数DefaultHash转成整数
			//转换方法：BKDR ，数学研究出来的
			size_t hash = 0;
			for (auto ch : key) 
			{    
				hash = hash * 131 + ch;    //hash乘一个值131再加字符串的ASCII
			}

			return hash;
		}
	};

2.仿函数中的特化

这样模拟实现unorodered_map时，直接定义 unorodered_map<int,int> a; 时不用传仿函数是因为template<class K, class V, class HashFunc = DefaultHash<K>> 仿函数给了缺省参数，不传默认是基础的类struct DefaultHash，直接定义 unorodered_map<string,string> b; 时也可以不用传仿函数是因为类模板特化，key是string就会使用特化版本的struct DefaultHash<string>

    template<class K>
	struct DefaultHash
	{
		size_t operator()(const K& key)
		{
			return (size_t)key;
		}
	};


	template<>
	struct DefaultHash<string>
	{
		size_t operator()(const string& key)
		{             //key是string类型用仿函数DefaultHash转成整数
			//转换方法：BKDR ，数学研究出来的
			size_t hash = 0;
			for (auto ch : key) 
			{    
				hash = hash * 131 + ch;    //hash乘一个值131再加字符串的ASCII
			}

			return hash;
		}
	};

template<class K, class V, class HashFunc = DefaultHash<K>>
	class HashTable
	{
		typedef HashData<K, V> Data;
	public:
    ……

3.开散列（哈希桶）

（1）哈希桶/开散列/链地址法/开链法概念

开散列法又叫哈希桶/链地址法/开链法，数据不存在表中，表里面存储一个链表指针，冲突的数据链表形式挂起来。哈希桶用单链表，不进行插入删除操作就不用双向链表，仅头插就可以，而且单链表比双向链表少存一个指针，能省则省。

vector<Node*> 数组每个位置存的是链表第一个值的指针

#pragma once
#include<vector>

namespace CloseHash
{

	enum State
	{
		EMPTY,
		EXITS,
		DELETE
	};


	template<class K, class V>
	struct HashData
	{
		pair<K, V> _kv;
		State _state = EMPTY;
	};

	template<class K>
	struct DefaultHash
	{
		size_t operator()(const K& key)
		{
			return (size_t)key;
		}
	};


	template<>
	struct DefaultHash<string>
	{
		size_t operator()(const string& key)
		{             //key是string类型用仿函数DefaultHash转成整数
			//转换方法：BKDR ，数学研究出来的
			size_t hash = 0;
			for (auto ch : key) 
			{    
				hash = hash * 131 + ch;    //hash乘一个值131再加字符串的ASCII
			}

			return hash;
		}
	};

	//struct StringHash
	//{
	//	// "abcd"
	//	// "aa"
	//	size_t operator()(const string& key)
	//	{
	//		//return key[0];     可以
	//		//return (size_t)&key; 不行
	//
	//		/*size_t hash = 0;
	//		for (auto ch : key)
	//		{
	//			hash += ch;
	//		}
	//
	//		return hash;*/
	//
	//		// BKDR
	//		size_t hash = 0;
	//		for (auto ch : key)
	//		{
	//			hash = hash* 131 + ch;
	//		}
	//
	//		return hash;
	//	}
	//};

	template<class K, class V, class HashFunc = DefaultHash<K>>
	class HashTable
	{
		typedef HashData<K, V> Data;
	public:
		bool Insert(const pair<K, V>& kv)
		{
			if (Find(kv.first))    //去冗余 易错点1，容易忘写
			{
				return false;
			}

			// 负载因子到0.7及以上，就扩容
			if (_tables.size() == 0 || _n * 10 / _tables.size() >= 7)
			{
				size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				// 扩容以后，需要重新映射
				HashTable<K, V, HashFunc> newHT;
				newHT._tables.resize(newSize);
				// 遍历旧表，插入newHT
				for (auto& e : _tables)
				{
					if (e._state == EXITS)    //易错点2，容易忘写
					{
						newHT.Insert(e._kv);
					}
				}
				newHT._tables.swap(_tables);
			}

			HashFunc hf;
			size_t starti = hf(kv.first);
			starti %= _tables.size();

			size_t hashi = starti;
			size_t i = 1;
			// 线性探测/二次探测
			while (_tables[hashi]._state == EXITS)
			{
				hashi = starti + i;
				++i;
				hashi %= _tables.size();
			}

			_tables[hashi]._kv = kv;
			_tables[hashi]._state = EXITS;
			_n++;

			return true;
		}

		Data* Find(const K& key)
		{
			if (_tables.size() == 0)  //哈希表还没数据时不能查找
			{
				return nullptr;
			}

			HashFunc hf;
			size_t starti = hf(key);  //不同类型的key通过对应的仿函数转成整数-
			starti %= _tables.size();//-比如key是string类型，就-        
			size_t hashi = starti;           //-用仿函数DefaultHash转成整形
			size_t i = 1;
			while (_tables[hashi]._state != EMPTY)
			{    //下面如果状态是DELETE就无法被查找到
				if (_tables[hashi]._state != DELETE && _tables[hashi]._kv.first == key)
				{
					return &_tables[hashi];
				}

				hashi = starti + i;
				++i;
				hashi %= _tables.size();
			}

			return nullptr;
		}

		bool Erase(const K& key)
		{
			Data* ret = Find(key);
			if (ret)
			{
				ret->_state = DELETE;
				--_n;
				return true;
			}
			else
			{
				return false;
			}
		}

	private:
		vector<Data> _tables;
		size_t _n = 0; // 存储关键字个数
	};


	void TestHT1()
	{
		int a[] = { 20, 5, 8, 99999, 10, 30, 50 };
		//HashTable<int, int, DefaultHash<int>> ht;
		HashTable<int, int> ht;

		if (ht.Find(10))
		{
			cout << "找到了10" << endl;
		}

		for (auto e : a)
		{
			ht.Insert(make_pair(e, e));
		}

		// 测试扩容
		ht.Insert(make_pair(15, 15));
		ht.Insert(make_pair(5, 5));
		ht.Insert(make_pair(15, 15));

		if (ht.Find(50))
		{
			cout << "找到了50" << endl;
		}

		if (ht.Find(10))
		{
			cout << "找到了10" << endl;
		}

		ht.Erase(10);
		ht.Erase(10);

		if (ht.Find(50))
		{
			cout << "找到了50" << endl;
		}

		if (ht.Find(10))
		{
			cout << "找到了10" << endl;
		}
	}

	void TestHT2()
	{
		string arr[] = { "苹果", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜", "苹果", "香蕉", "苹果", "香蕉" };

		/*string s1("苹果");
		string s2("果苹");
		string s3("果果");
		string s4("萍果");

		string s5("abcd");
		string s6("bcad");
		string s7("aadd");

		StringHash hf;
		cout << hf(s1) << endl;
		cout << hf(s2) << endl;
		cout << hf(s3) << endl;
		cout << hf(s4) << endl << endl;
		cout << hf(s5) << endl;
		cout << hf(s6) << endl;
		cout << hf(s7) << endl;*/


		//HashTable<string, int, StringHash> countHT;
		HashTable<string, int> countHT;

		for (auto& str : arr)
		{
			auto ret = countHT.Find(str);
			if (ret)
			{
				ret->_kv.second++;
			}
			else
			{
				countHT.Insert(make_pair(str, 1));
			}
		}

		// 对应类型配一个仿函数，仿函数对象实现把key对象转换成映射的整数
		//HashTable<Date, int, DateHash> countHT;
		//HashTable<Student, int, StudentHash> countHT;


		HashTable<string, int> copy(countHT);
	}
}

namespace Bucket
{
	template<class K, class V>
	struct HashNode
	{
		pair<K, V> _kv;
		HashNode<K, V>* _next;

		HashNode(const pair<K, V>& kv)
			:_kv(kv)
			, _next(nullptr)
		{}
	};

	template<class K, class V>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:
		bool Insert(const pair<K, V>& kv)
		{
			if (Find(kv.first))
			{
				return false;
			}

			// 负载因子 == 1 扩容
			if (_tables.size() == _n)
			{
				// 扩容，有缺陷，可以再优化，大家下去可以思考一下
				size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				HashTable<K, V> newHT;
				newHT._tables.resize(newSize, nullptr);

				for (size_t i = 0; i < _tables.size(); ++i)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						newHT.Insert(cur->_kv);
						cur = cur->_next;
					}
				}

				newHT._tables.swap(_tables);
			}

			size_t hashi = kv.first;
			hashi %= _tables.size();

			// 头插到对应的桶即可
			Node* newnode = new Node(kv);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;

			++_n;

			return true;
		}

		Node* Find(const K& key)
		{
			if (_tables.size() == 0)
			{
				return nullptr;
			}

			size_t hashi = key;
			hashi %= _tables.size();
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					return cur;
				}

				cur = cur->_next;
			}

			return nullptr;
		}

	private:
		// 指针数组
		vector<Node*> _tables;
		size_t _n = 0;
	};

	void TestHT1()
	{
		int a[] = { 20, 5, 8, 99999, 10, 30, 50 };
		//HashTable<int, int, DefaultHash<int>> ht;
		HashTable<int, int> ht;

		if (ht.Find(10))
		{
			cout << "找到了10" << endl;
		}

		for (auto e : a)
		{
			ht.Insert(make_pair(e, e));
		}

		// 测试扩容
		ht.Insert(make_pair(15, 15));
		ht.Insert(make_pair(5, 5));
		ht.Insert(make_pair(15, 15));
		ht.Insert(make_pair(25, 15));
		ht.Insert(make_pair(35, 15));
		ht.Insert(make_pair(45, 15));
	}
}

阶段三：完善哈希桶

新增析构函数，优化版insert，删除Erase，拷贝构造,赋值(自己写)，Hashfunc(string也可以%)

析构函数

vector是自定义类型会去调用自己的析构函数析构数组，但不会释放每个位置上的链表的节点，我们要一个一个释放每个链表的每个节点

	    ~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}

				_tables[i] = nullptr;
			}
		}

优化insert

开一个新的扩容后大小的数组newTable，把原数组_table的节点一个一个头插到新数组中，这样就避免原方法中的双重消耗：新开节点(插入数组)，释放旧数组节点

bool Insert(const T& data)
		{
			HashFunc hf;
			KeyOfT kot;

			if (Find(kot(data)))
			{
				return false;
			}

			// 负载因子 == 1 扩容
			if (_tables.size() == _n)
			{
				size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				vector<Node*> newTable;    //开一个新的扩容后大小的数组
				newTable.resize(newSize, nullptr); //开一个新的扩容后大小的数组
				for (size_t i = 0; i < _tables.size(); ++i)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;

						size_t hashi = hf(kot(cur->_data)) % newSize;
						cur->_next = newTable[hashi];
						newTable[hashi] = cur;

						cur = next;
					}

					_tables[i] = nullptr;
				}

				newTable.swap(_tables);
			}

			size_t hashi = hf(kot(data));
			hashi %= _tables.size();

			// 头插到对应的桶即可
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;

			++_n;

			return true;
		}

总代码：

#pragma once
#include<vector>

template<class K>
struct DefaultHash
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};


template<>
struct DefaultHash<string>
{
	size_t operator()(const string& key)
	{
		// BKDR
		size_t hash = 0;
		for (auto ch : key)
		{
			hash = hash * 131 + ch;
		}

		return hash;
	}
};


namespace Bucket
{
	template<class K, class V>
	struct HashNode
	{
		pair<K, V> _kv;
		HashNode<K, V>* _next;

		HashNode(const pair<K, V>& kv)
			:_kv(kv)
			, _next(nullptr)
		{}
	};

	template<class K, class V, class HashFunc = DefaultHash<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:
		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}

				_tables[i] = nullptr;
			}
		}

		bool Insert(const pair<K, V>& kv)
		{
			if (Find(kv.first))
			{
				return false;
			}

			HashFunc hf;

			// 负载因子 == 1 扩容
			if (_tables.size() == _n)
			{
				// 扩容，有缺陷，可以再优化，大家下去可以思考一下
				/*size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				HashTable<K, V> newHT;
				newHT._tables.resize(newSize, nullptr);

				for (size_t i = 0; i < _tables.size(); ++i)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						newHT.Insert(cur->_kv);
						cur = cur->_next;
					}
				}

				newHT._tables.swap(_tables);*/

				size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				vector<Node*> newTable;
				newTable.resize(newSize, nullptr);
				for (size_t i = 0; i < _tables.size(); ++i)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;

						size_t hashi = hf(cur->_kv.first) % newSize;
						cur->_next = newTable[hashi];
						newTable[hashi] = cur;

						cur = next;
					}

					_tables[i] = nullptr;
				}

				newTable.swap(_tables);
			}

			size_t hashi = hf(kv.first);
			hashi %= _tables.size();

			// 头插到对应的桶即可
			Node* newnode = new Node(kv);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;

			++_n;

			return true;
		}

		Node* Find(const K& key)
		{
			if (_tables.size() == 0)
			{
				return nullptr;
			}

			HashFunc hf;
			size_t hashi = hf(key);
			//size_t hashi = HashFunc()(key);

			hashi %= _tables.size();
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					return cur;
				}

				cur = cur->_next;
			}

			return nullptr;
		}

		bool Erase(const K& key)
		{
			if (_tables.size() == 0)
			{
				return false;
			}

			HashFunc hf;
			size_t hashi = hf(key);
			hashi %= _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (prev == nullptr)
					{
						_tables[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}

					delete cur;
					
					return true;
				}

				prev = cur;
				cur = cur->_next;
			}

			return false;

			//size_t hashi = key;
			//hashi %= _tables.size();
			//Node* cur = _tables[hashi];
			//while (cur)
			//{
			//	if (cur->_kv.first == key)
			//	{
			//		if (cur->_next == nullptr)
			//		{
			//			cur->_kv = _tables[hashi]->_kv;
			//			Node* first = _tables[hashi];
			//			_tables[hashi] = first->_next;
			//			delete first;
			//		}
			//		else
			//		{
			//			//....
			//		}

			//		return true;
			//	}

			//	prev = cur;
			//	cur = cur->_next;
			//}

			//return false;
		}

	private:
		// 指针数组
		vector<Node*> _tables;
		size_t _n = 0;
	};

	void TestHT1()
	{
		int a[] = { 20, 5, 8, 99999, 10, 30, 50 };
		//HashTable<int, int, DefaultHash<int>> ht;
		HashTable<int, int> ht;

		if (ht.Find(10))
		{
			cout << "找到了10" << endl;
		}

		for (auto e : a)
		{
			ht.Insert(make_pair(e, e));
		}

		ht.Erase(20);
		ht.Erase(10);
		ht.Erase(30);
		ht.Erase(50);


		// 测试扩容
		ht.Insert(make_pair(15, 15));
		ht.Insert(make_pair(5, 5));
		ht.Insert(make_pair(15, 15));
		ht.Insert(make_pair(25, 15));
		ht.Insert(make_pair(35, 15));
		ht.Insert(make_pair(45, 15));
	}

	void TestHT2()
	{
		int a[] = { 20, 5, 8, 99999, 10, 30, 50 };
		HashTable<int, int> ht;
		for (auto e : a)
		{
			ht.Insert(make_pair(e, e));
		}

		// 需要自己实现拷贝构造，完成链表桶深拷贝
		//HashTable<int, int> copy(ht);
	}

	void TestHT3()
	{
		string arr[] = { "苹果", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜", "苹果", "香蕉", "苹果", "香蕉" };

		//HashTable<string, int, StringHash> countHT;
		HashTable<string, int> countHT;

		for (auto& str : arr)
		{
			auto ret = countHT.Find(str);
			if (ret)
			{
				ret->_kv.second++;
			}
			else
			{
				countHT.Insert(make_pair(str, 1));
			}
		}
	}
}

阶段四：模拟实现unorderedmap/set

（1）template<class K, class T, class KeyOfT, class HashFunc> 第二个模板T

作用是让同一份哈希桶的代码能区分unordered_set和unordered_map，如果是unordered_set模板T就传入K，unordered_map模板T就传入pair

（2）class KeyOfT模板参数：

用于取出map中的pair中的K，取set中的key

        KeyOfT kot;

			if (Find(kot(data)))
			{
				return false;
			}

（3）HashFunc模板要放到 UnorderedSet.h / UnorderedMap.h 中

因为K可能是 Date 这些自定义类型，无法直接比较，要写个仿函数DateHash，test_set() 中定义对象时就传这个仿函数unordered_set<Date, DateHash> sd;

UnorderedSet.h

pragma once

#include "HashTable.h"

namespace bit
{
	// 21:06
	template<class K, class HashFunc = DefaultHash<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		typedef typename Bucket::HashTable<K, K, SetKeyOfT, HashFunc>::iterator iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}

		iterator find(const K& key)
		{
			return _ht.Find(key);
		}

		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}
	private:
		Bucket::HashTable<K, K, SetKeyOfT, HashFunc> _ht;
	};

	struct Date
	{
		Date(int year = 1, int month = 1, int day = 1)
		:_year(year)
		, _month(month)
		, _day(day)
		{}

		bool operator==(const Date& d) const
		{
			return _year == d._year
				&& _month == d._month
				&& _day == d._day;
		}

		int _year;
		int _month;
		int _day;
	};

	struct DateHash
	{
		size_t operator()(const Date& d)
		{
			//return d._year + d._month + d._day;
			size_t hash = 0;
			hash += d._year;
			hash *= 131;
			hash += d._month;
			hash *= 1313;
			hash += d._day;

			//cout << hash << endl;

			return hash;
		}
	};

	void test_set()
	{
		unordered_set<int> s;
		//set<int> s;
		s.insert(2);
		s.insert(3);
		s.insert(1);
		s.insert(2);
		s.insert(5);
		s.insert(12);

		//unordered_set<int>::iterator it = s.begin();
		unordered_set<int>::iterator it;
		it = s.begin();

		//auto it = s.begin();
		while (it != s.end())
		{
			cout << *it << " ";
			++it;
		}
		cout << endl;

		
		for (auto e : s)
		{
			cout << e << " ";
		}
		cout << endl;

		unordered_set<Date, DateHash> sd;
		sd.insert(Date(2022, 3, 4));
		sd.insert(Date(2022, 4, 3));
	}
}

UnorderedMap.h

#pragma once

#include "HashTable.h"

namespace bit
{
	template<class K, class V, class HashFunc = DefaultHash<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};
	public:
		typedef typename Bucket::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc>::iterator iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		pair<iterator, bool> insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);	
		}

		iterator find(const K& key)
		{
			return _ht.Find(key);
		}

		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}

		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = insert(make_pair(key, V()));
			return ret.first->second;
		}

	private:
		Bucket::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc> _ht;
	};

	void test_map()
	{
		unordered_map<string, string> dict;
		dict.insert(make_pair("sort", ""));
		dict.insert(make_pair("left", ""));
		dict.insert(make_pair("left", "ʣ"));
		dict["string"];
		dict["left"] = "ʣ";
		dict["string"] = "ַ";

		unordered_map<string, string>::iterator it = dict.begin();
		while (it != dict.end())
		{
			cout << it->first << " " << it->second << endl;
			++it;
		}

		cout << endl;

		for (auto& kv : dict)
		{
			cout << kv.first << " " << kv.second << endl;
		}
	}
}

HashTable.h

#pragma once
#include<vector>

template<class K>
struct DefaultHash
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};


template<>
struct DefaultHash<string>
{
	size_t operator()(const string& key)
	{
		// BKDR
		size_t hash = 0;
		for (auto ch : key)
		{
			hash = hash * 131 + ch;
		}

		return hash;
	}
};

namespace Bucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};

	template<class K, class T, class KeyOfT, class HashFunc>
	class HashTable;

	template<class K, class T, class KeyOfT, class HashFunc>
	class __HTIterator
	{
		typedef HashNode<T> Node;
		typedef __HTIterator<K, T, KeyOfT, HashFunc> Self;
	public:
		Node* _node;
		HashTable<K, T, KeyOfT, HashFunc>* _pht;

		__HTIterator() {}    //默认构造函数和非默认构造函数重载

		__HTIterator(Node* node, HashTable<K, T, KeyOfT, HashFunc>* pht)
			:_node(node)
			, _pht(pht)
		{}

		Self& operator++()
		{
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else
			{
				KeyOfT kot;
				HashFunc hf;
				size_t hashi = hf(kot(_node->_data)) % _pht->_tables.size();
				++hashi;
				//找下一个不为空的桶
				for (; hashi < _pht->_tables.size(); ++hashi)
				{
					if (_pht->_tables[hashi])
					{
						_node = _pht->_tables[hashi];
						break;
					}
				}

				// 没有找到不为空的桶，用nullptr去做end标识
				if (hashi == _pht->_tables.size())
				{
					_node = nullptr;
				}
			}

			return *this;
		}

		T& operator*()
		{
			return _node->_data;
		}

		T* operator->()
		{
			return &_node->_data;
		}

		bool operator!=(const Self& s) const
		{
			return _node != s._node;
		}

		bool operator==(const Self& s) const
		{
			return _node == s._node;
		}
	};

	// unordered_map ->HashTable<K, pair<K, V>, MapKeyOfT> _ht;
	// unordered_set ->HashTable<K, K, SetKeyOfT> _ht;
	template<class K, class T, class KeyOfT, class HashFunc>
	class HashTable
	{
		template<class K, class T, class KeyOfT, class HashFunc>
		friend class __HTIterator;

		typedef HashNode<T> Node;
	public:
		typedef __HTIterator<K, T, KeyOfT, HashFunc> iterator;

		iterator begin()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				if (cur)
				{
					return iterator(cur, this);
				}
			}

			return end();
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}

		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}

				_tables[i] = nullptr;
			}
		}

		size_t GetNextPrime(size_t prime)
		{
			const int PRIMECOUNT = 28;
			static const size_t primeList[PRIMECOUNT] =
			{
				53ul, 97ul, 193ul, 389ul, 769ul,
				1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
				49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
				1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
				50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
				1610612741ul, 3221225473ul, 4294967291ul
			};

			// 获取比prime大那一个素数
			size_t i = 0;
			for (; i < PRIMECOUNT; ++i)
			{
				if (primeList[i] > prime)
					return primeList[i];
			}

			return primeList[i];
		}

		pair<iterator, bool> Insert(const T& data)
		{
			HashFunc hf;
			KeyOfT kot;

			iterator pos = Find(kot(data));
			if (pos != end())
			{
				return make_pair(pos, false);
			}

			// 负载因子 == 1 扩容
			if (_tables.size() == _n)
			{
				//size_t newSize = _tables.size() == 0 ? 11 : _tables.size() * 2;
				size_t newSize = GetNextPrime(_tables.size());
				if (newSize != _tables.size())
				{
					vector<Node*> newTable;
					newTable.resize(newSize, nullptr);
					for (size_t i = 0; i < _tables.size(); ++i)
					{
						Node* cur = _tables[i];
						while (cur)
						{
							Node* next = cur->_next;

							size_t hashi = hf(kot(cur->_data)) % newSize;
							cur->_next = newTable[hashi];
							newTable[hashi] = cur;

							cur = next;
						}

						_tables[i] = nullptr;
					}

					newTable.swap(_tables);
				}
			}

			size_t hashi = hf(kot(data));
			hashi %= _tables.size();

			// 头插到对应的桶即可
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;

			++_n;

			return make_pair(iterator(newnode, this), false);;
		}

		iterator Find(const K& key)
		{
			if (_tables.size() == 0)
			{
				return iterator(nullptr, this);
			}

			KeyOfT kot;
			HashFunc hf;
			size_t hashi = hf(key);
			//size_t hashi = HashFunc()(key);

			hashi %= _tables.size();
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return iterator(cur, this);
				}

				cur = cur->_next;
			}

			return iterator(nullptr, this);
		}

		bool Erase(const K& key)
		{
			if (_tables.size() == 0)
			{
				return false;
			}

			HashFunc hf;
			KeyOfT kot;
			size_t hashi = hf(key);
			hashi %= _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr)
					{
						_tables[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}

					delete cur;
					
					return true;
				}

				prev = cur;
				cur = cur->_next;
			}

			return false;
		}

	private:
		// 指针数组
		vector<Node*> _tables;
		size_t _n = 0;
	};
}

Test.cpp

#include<iostream>
#include <set>
#include <map>
#include <unordered_set>
#include <unordered_map>
#include <time.h>
using namespace std;

#include "HashTable.h"
#include "UnorderedMap.h"
#include "UnorderedSet.h"

void test_set()
{
	unordered_set<int> s;
	//set<int> s;
	s.insert(2);
	s.insert(3);
	s.insert(1);
	s.insert(2);
	s.insert(5);

	//unordered_set<int>::iterator it = s.begin();
	auto it = s.begin();
	while (it != s.end())
	{
		cout << *it << " ";
		++it;
	}
	cout << endl;

	for (auto e : s)
	{
		cout << e << " ";
	}
	cout << endl;
}

void test_op()
{
	int n = 10000000;
	vector<int> v;
	v.reserve(n);
	srand(time(0));
	for (int i = 0; i < n; ++i)
	{
		//v.push_back(i);
		//v.push_back(rand()+i);  // 重复少
		v.push_back(rand());  // 重复多
	}

	size_t begin1 = clock();
	set<int> s;
	for (auto e : v)
	{
		s.insert(e);
	}
	size_t end1 = clock();

	size_t begin2 = clock();
	unordered_set<int> us;
	for (auto e : v)
	{
		us.insert(e);
	}
	size_t end2 = clock();

	cout << s.size() << endl;

	cout << "set insert:" << end1 - begin1 << endl;
	cout << "unordered_set insert:" << end2 - begin2 << endl;


	size_t begin3 = clock();
	for (auto e : v)
	{
		s.find(e);
	}
	size_t end3 = clock();

	size_t begin4 = clock();
	for (auto e : v)
	{
		us.find(e);
	}
	size_t end4 = clock();
	cout << "set find:" << end3 - begin3 << endl;
	cout << "unordered_set find:" << end4 - begin4 << endl;

	
	size_t begin5 = clock();
	for (auto e : v)
	{
		s.erase(e);
	}
	size_t end5 = clock();

	size_t begin6 = clock();
	for (auto e : v)
	{
		us.erase(e);
	}
	size_t end6 = clock();
	cout << "set erase:" << end5 - begin5 << endl;
	cout << "unordered_set erase:" << end6 - begin6 << endl;
}

void test_map()
{
	unordered_map<string, string> dict;
	dict.insert(make_pair("sort", "排序"));
	dict.insert(make_pair("left", "左边"));
	dict.insert(make_pair("left", "剩余"));
	dict["string"];
	dict["left"] = "剩余";
	dict["string"] = "字符串";
}

int main()
{
	bit::test_set();
	//test_op();
	bit::test_map();
	//Bucket::TestHT3();
	//TestHT2();

	return 0;
}