[C++ 系列] 83. unordered_map模拟实现

文章目录

1. 哈希表改造
2. unordered_map 模拟实现
3. 其它

1. 哈希表改造

针对上文中所模拟实现的开散列，进行以下两点改造：

模板参数列表的改造

// K:关键码类型
// V: 不同容器V的类型不同，如果是unordered_map，V代表一个键值对，如果是unordered_set,V 为 K
// KeyOfValue: 因为V的类型不同，通过value取key的方式就不同，详细见unordered_map/set的实现
// HF: 哈希函数仿函数对象类型，哈希函数使用除留余数法，需要将Key转换为整形数字才能取模
template<class K, class V, class KeyOfValue, class HF = DefHashF<T> >
class HashBucket;

增加迭代器操作

这个是重点，对于容器的模拟实现，很重要的一步就是实现其迭代器。主要就是重载 ++a、a++、!=、==、begin()、end() 等等，以及插入、删除、查找的返回值需要改造为相应的迭代器等操作。

以下代码多处已经过注释，已经过测试，未发现大漏洞，基本满足实现要求。若有疑问的话希望能看看上篇文章的思路，讲解的很清楚，或是在下留言即可。

参见代码如下：

// HashBucket.h

#pragma once
#include <vector>
using namespace std;

template<class T>
class HashBucketNode
{
	T m_val;
	HashBucketNode<T> * m_next;

	HashBucketNode(const T & val = T()) :
		m_val(val),
		m_next(nullptr)
	{}

	template<class K, class V, class KeyofValue, class HF>
	friend class HashBucket;
};

class dealInt
{
public:
	int operator()(int n)
	{
		return n;
	}
};

template<class K, class V, class KeyofValue, class HF = dealInt>
class HashBucket
{
	vector<HashBucketNode<V> *> m_data;
	size_t m_size;

	static long long s_m_primeTable[30];
	int m_primePos;
public:
	HashBucket(size_t capacity = s_m_primeTable[0]) :
		m_data(capacity, nullptr),
		m_size(0),
		m_primePos(0)
	{}

	// 内部类写iterator
	// 内部类不需要写template<class K, class V, class KeyofValue, class HF = dealInt>
	// 会继承外部类的一切
	class iterator
	{
	public:
		// 将开散列看作二维数组结构的话，我们需要使用迭代器确定横纵坐标
		// 1.即vector内的桶的位置信息需要确定，作为横坐标这个迭代器，
		// 主要方便++操作，若对链表最后一个元素++，则找下一个不为空的vector元素位置即可
		// 2.在该桶中即链表中的元素位置信息需要确定，作为纵坐标
		HashBucket<K, V, KeyofValue, HF> * m_hb;
		HashBucketNode<V> * m_node;

		// 初始化一个桶，才能够进行使用
		iterator(HashBucketNode<V> * node = nullptr,
				 HashBucket<K, V, KeyofValue, HF> * hbpos = nullptr): 
			m_node(node),
	     	m_hb(hbpos)
		{}
		
		iterator(const iterator & it) :
			m_node(it.m_node),
			m_hb(it.m_hb)
		{}
		
		// 迭代器
		V& operator*()
		{
			return m_node->m_val;
		}

		V* operator->()
		{
			return &m_node->m_val;
		}
		
		// 前置++
		iterator operator++()
		{
			// 提前保存m_val，若m_node为最后一个数据
			// 则++后出现空值的情况
			int val = m_node->m_val;
			m_node = m_node->m_next;
			// 若m_node为空
			if (!m_node)
			{
				// 得到下一个桶的位置
				// 如果node为最后一个桶的最后一个元素
				// +1后不会进入for循环，返回空即可，这样做是合理的
				// 否则针对最后一个桶中的最后一个元素进行++的情况，极易返回什么都不做的m_node
				int bucketno = m_hb->hashFunc(KeyofValue()(val)) + 1;
				for (; bucketno < m_hb->capacity(); bucketno++)
				{
					// 如果当前桶的位置不为空
					if (m_hb->m_data[bucketno])
					{
						m_node = m_hb->m_data[bucketno];
						break;
					}
				}
			}

			return *this;
		}

		// 后置++
		iterator operator++(int)
		{
			// 调用拷贝构造，保存副本
			iterator<K, V, KeyofValue, HF> tmp = *this;
			// 调用前置++即可
			++(*this);
			// 返回副本即可
			return tmp;
		}
		
		// == != 均为双目运算符，第一个参数为this指针
		// 仅需传入第二个参数即可，并且加两个const，不希望两者改变
		bool operator==(const iterator & data) const
		{
			return m_node == data.m_node && m_hb == data.m_hb;
		}

		bool operator!=(const iterator & data) const
		{
			return m_node != data.m_node || m_hb != data.m_hb;
		}
	};


private:
	int hashFunc(const K & key)
	{
		HF func;
		return func(key) % capacity();
	}

	void checkCapacity()
	{
		if (m_size == capacity())
		{
			int mcapa = capacity();
			vector<HashBucketNode<V> *> tmp(s_m_primeTable[++m_primePos], nullptr);
			m_data.swap(tmp);
			m_size = 0;

			int i;
			HashBucketNode<V> * cur;
			for (i = 0; i < mcapa; i++)
			{
				for (cur = tmp[i]; cur; cur = cur->m_next)
				{
					insert(cur->m_val);
				}
			}
		}
	}

public:
	// 返回第一个迭代器
	iterator begin()
	{
		// 从vector第一个位置开始找即可
		int bucketno = 0;
		for (; bucketno < capacity(); bucketno++)
		{
			if (m_data[bucketno])
			{
				return iterator(m_data[bucketno], this);
			}
		}

		return iterator(nullptr, this);
	}

	iterator end()
	{
		return iterator(nullptr, this);
	}


	iterator insert(const V & val)
	{
		checkCapacity();

		int hashnum = hashFunc(KeyofValue()(val));
		HashBucketNode<V> * tmp;

		if (m_data[hashnum])
		{
			for (tmp = m_data[hashnum]; tmp; tmp = tmp->m_next)
			{
				if (tmp->m_val == val)
				{
					// 代表插入失败
					return end();
				}
			}
		}

		tmp = new HashBucketNode<V>(val);

		tmp->m_next = m_data[hashnum];
		m_data[hashnum] = tmp;

		m_size++;
		// 返回插入迭代器，当前位置，桶的位置
		return iterator(m_data[hashnum], this);
	}

	iterator erase(const V & val)
	{
		int hashnum = hashFunc(KeyofValue()(val));
		HashBucketNode<V> * tmp;

		if (!m_data[hashnum])
		{
			// 当前桶不存在，删除失败，返回end()即可
			return end();
		}
		
		// 当前桶存在，查看是否为头删
		if (m_data[hashnum]->m_val == val)
		{
			// 保存头结点的副本res，++res找到待返回的下一个节点
			iterator res(m_data[hashnum], this);
			++res;

			// 进行头删
			tmp = m_data[hashnum];
			m_data[hashnum] = tmp->m_next;
			delete tmp;

			// 返回下一个节点即可
			m_size--;
			return res;
		}
		
		// 执行后删，道理如同头删操作
		else
		{
			for (tmp = m_data[hashnum]; tmp->m_next; tmp = tmp->m_next)
			{
				if (tmp->m_next->m_val == val)
				{
					iterator res(tmp->m_next, this);
					++res;

					HashBucketNode<V> * cur;
					cur = tmp->m_next;
					tmp->m_next = cur->m_next;
					delete cur;

					m_size--;
					return res;
				}
			}
			return end();
		}
	}

	iterator find(const V & val)
	{
		int hashnum = hashFunc(KeyofValue()(val));

		HashBucketNode<V> * cur;
		for (cur = m_data[hashnum]; cur; cur = cur->m_next)
		{
			if (cur->m_val == val)
			{
				return iterator(cur, this);
			}
		}
		return iterator(nullptr, this);
	}

	void clear()
	{
		HashBucketNode<V> * tmp;
		for (auto & head : m_data)
		{
			while (head)
			{
				tmp = head;
				head = head->m_next;
				delete tmp;
			}
		}
		m_size = 0;
	}

	size_t capacity()
	{
		return s_m_primeTable[m_primePos];
	}
};

template<class K, class V, class KeyofValue, class HF>
long long HashBucket<K, V, KeyofValue, HF>::s_m_primeTable[30] = {
	11, 23, 47, 89, 179,
	353, 709, 1409, 2819, 5639,
	11273, 22531, 45061, 90121, 180233,
	360457, 720899, 1441807, 2883593, 5767169,
	11534351, 23068673, 46137359, 92274737, 184549429,
	369098771, 738197549, 1476395029, 2952790016u, 4294967291u
};

2. unordered_map 模拟实现

有了上面 "HashBucket.h"，仅需简单改造即可

unordered_map 中存储的是 pair<K, V> 的键值对，K 为 key 的类型， V 为 value 的类型，HF 哈希函数类型 unordered_map 在实现时，只需将 hashbucket 中的接口重新封装即可。

尤其注意下 unordered_map 各类函数的返回值可能与想象的有所不同，例如其 insert 会返回 pair<iterator, bool>，这个问题得进行修改，在 "HashBucket.h" 需要将 insert 函数进行改造。

参见代码如下：

pair<iterator, bool> insert(const V & val)
{
	checkCapacity();

	int hashnum = hashFunc(KeyofValue()(val));
	HashBucketNode<V> * tmp;

	if (m_data[hashnum])
	{
		for (tmp = m_data[hashnum]; tmp; tmp = tmp->m_next)
		{
			if (tmp->m_val == val)
			{
				pair<iterator, bool> pairtmp;
				pairtmp.first = end();
				pairtmp.second = false;
				return pairtmp;
			}
		}
	}

	tmp = new HashBucketNode<V>(val);

	tmp->m_next = m_data[hashnum];
	m_data[hashnum] = tmp;

	m_size++;

	pair<iterator, bool> pairtmp;
	iterator it(m_data[hashnum], this);
	pairtmp.first = it;
	pairtmp.second = true;
	return pairtmp;
}

count 函数判断某一个 key 值是否在哈希表里。在 "HashBucket.h" 需要将 count 函数进行添加。

参见代码如下：

size_t count(const K & kv)
	{
		int bucketno = hashFunc(kv);
		HashBucketNode<V> * cur;
		for (cur = m_data[bucketno]; cur; cur = cur->m_next)
		{
			if (KeyofValue()(cur->m_val) == kv)
			{
				return 1;
			}
		}
		return 0;
	}

bucketCount 返回桶的个数。在 "HashBucket.h" 需要将 bucketCount 函数进行添加。

size_t bucketCount()
{
	return capacity();
}

bucketSize 返回某个桶中的元素个数。在 "HashBucket.h" 需要将 bucketSize 函数进行添加。

size_t bucketSize(size_t bucketno)
{
	HashBucketNode<V> * cur;
	int count = 0;
	for (cur = m_data[bucketno]; cur; cur = cur->next)
	{
		count++;
	}
	return count;
}

operator[ ] 是 map、unordered_map 的一大特色，在此一定要理解其作用。有些代码用中括号来查找 map，其实在 map 中，中括号已经被重载为有不存在即插入的功能。于是如果查找的 key 不存在，则 map 中会增加很多垃圾数据。

参见代码如下：

// 写法1 
// 传进K返回V,[ ]传入插入K值，返回对应的V值
// 看到有些代码，用中括号来查找map, 
// 其实在map中，中括号已经被重载为有不存在即插入的功能。
// 于是如果查找的key不存在，则map中会增加很多垃圾数据。
V& operator[](const K & key)
{
	// pair<K, V>(key, V()) 中V()为任意值，最后需要将其返回，传入V的构造函数最为保险
	pair<iterator, bool> ptmp = m_hb.insert(pair<K, V>(key, V()));
	iterator itmp = ptmp.first;
	// return itmp->second;
	return (*itmp).second;
}

// 写法2
// 一行简洁流 不建议，可读性太差
// 在方法一中并不一定会创建临时变量，编译器会进行优化
const V& operator[](const K & key) const
{
	return (*(m_hb.insert(pair<K, V>(key, V()))).first).second;
}

修改后的 "HashBucket.h"

参见代码如下：

#pragma once
#include <vector>
using namespace std;

template<class T>
class HashBucketNode
{
	T m_val;
	HashBucketNode<T> * m_next;

	HashBucketNode(const T & val = T()) :
		m_val(val),
		m_next(nullptr)
	{}

	template<class K, class V, class KeyofValue, class HF>
	friend class HashBucket;
};

class dealInt
{
public:
	int operator()(int n)
	{
		return n;
	}
};

template<class K, class V, class KeyofValue, class HF = dealInt>
class HashBucket
{
	vector<HashBucketNode<V> *> m_data;
	size_t m_size;

	static long long s_m_primeTable[30];
	int m_primePos;
public:
	HashBucket(size_t capacity = s_m_primeTable[0]) :
		m_data(capacity, nullptr),
		m_size(0),
		m_primePos(0)
	{}

	~HashBucket()
	{
		clear();
	}

	class iterator
	{
	public:
		HashBucket<K, V, KeyofValue, HF> * m_hb;
		HashBucketNode<V> * m_node;

		iterator(HashBucketNode<V> * node = nullptr,
				 HashBucket<K, V, KeyofValue, HF> * hbpos = nullptr) :
				 m_node(node),
				 m_hb(hbpos)
		{}

		iterator(const iterator & it) :
			m_node(it.m_node),
			m_hb(it.m_hb)
		{}

		V& operator*()
		{
			return m_node->m_val;
		}

		V* operator->()
		{
			return &m_node->m_val;
		}

		iterator operator++()
		{
			V val = m_node->m_val;
			m_node = m_node->m_next;
			if (!m_node)
			{
				int bucketno = m_hb->hashFunc(KeyofValue()(val)) + 1;
				for (; bucketno < m_hb->capacity(); bucketno++)
				{
					if (m_hb->m_data[bucketno])
					{
						m_node = m_hb->m_data[bucketno];
						break;
					}
				}
			}

			return *this;
		}

		iterator operator++(int)
		{
			iterator<K, V, KeyofValue, HF> tmp = *this;
			++(*this);
			return tmp;
		}

		bool operator==(const iterator & data) const
		{
			return m_node == data.m_node && m_hb == data.m_hb;
		}

		bool operator!=(const iterator & data) const
		{
			return m_node != data.m_node || m_hb != data.m_hb;
		}
	};


private:
	int hashFunc(const K & key)
	{
		HF func;
		return func(key) % capacity();
	}

	void checkCapacity()
	{
		if (m_size == capacity())
		{
			int mcapa = capacity();
			vector<HashBucketNode<V> *> tmp(s_m_primeTable[++m_primePos], nullptr);
			m_data.swap(tmp);
			m_size = 0;

			int i;
			HashBucketNode<V> * cur;
			for (i = 0; i < mcapa; i++)
			{
				for (cur = tmp[i]; cur; cur = cur->m_next)
				{
					insert(cur->m_val);
				}
			}
		}
	}

public:
	iterator begin()
	{
		int bucketno = 0;
		for (; bucketno < capacity(); bucketno++)
		{
			if (m_data[bucketno])
			{
				return iterator(m_data[bucketno], this);
			}
		}

		return iterator(nullptr, this);
	}

	iterator end()
	{
		return iterator(nullptr, this);
	}


	pair<iterator, bool> insert(const V & val)
	{
		checkCapacity();

		int hashnum = hashFunc(KeyofValue()(val));
		HashBucketNode<V> * tmp;

		if (m_data[hashnum])
		{
			for (tmp = m_data[hashnum]; tmp; tmp = tmp->m_next)
			{
				if (tmp->m_val == val)
				{
					pair<iterator, bool> pairtmp;
					pairtmp.first = end();
					pairtmp.second = false;
					return pairtmp;
				}
			}
		}

		tmp = new HashBucketNode<V>(val);

		tmp->m_next = m_data[hashnum];
		m_data[hashnum] = tmp;

		m_size++;

		pair<iterator, bool> pairtmp;
		iterator it(m_data[hashnum], this);
		pairtmp.first = it;
		pairtmp.second = true;
		return pairtmp;
	}

	iterator erase(const V & val)
	{
		int hashnum = hashFunc(KeyofValue()(val));
		HashBucketNode<V> * tmp;

		if (!m_data[hashnum])
		{
			return end();
		}

		if (m_data[hashnum]->m_val == val)
		{
			iterator res(m_data[hashnum], this);
			++res;

			tmp = m_data[hashnum];
			m_data[hashnum] = tmp->m_next;
			delete tmp;

			m_size--;
			return res;
		}
		else
		{
			for (tmp = m_data[hashnum]; tmp->m_next; tmp = tmp->m_next)
			{
				if (tmp->m_next->m_val == val)
				{
					iterator res(tmp->m_next, this);
					++res;

					HashBucketNode<V> * cur;
					cur = tmp->m_next;
					tmp->m_next = cur->m_next;
					delete cur;

					m_size--;
					return res;
				}
			}
			return end();
		}
	}

	iterator find(const V & val)
	{
		int hashnum = hashFunc(KeyofValue()(val));

		HashBucketNode<V> * cur;
		for (cur = m_data[hashnum]; cur; cur = cur->m_next)
		{
			if (cur->m_val == val)
			{
				return iterator(cur, this);
			}
		}
		return iterator(nullptr, this);
	}

	void clear()
	{
		HashBucketNode<V> * tmp;
		for (auto & head : m_data)
		{
			while (head)
			{
				tmp = head;
				head = head->m_next;
				delete tmp;
			}
		}
		m_size = 0;
	}

	size_t capacity()
	{
		return s_m_primeTable[m_primePos];
	}

	size_t size()
	{
		return m_size;
	}

	bool empty()
	{
		return m_size == 0;
	}

	size_t count(const K & kv)
	{
		int bucketno = hashFunc(kv);
		HashBucketNode<V> * cur;
		for (cur = m_data[bucketno]; cur; cur = cur->m_next)
		{
			if (KeyofValue()(cur->m_val) == kv)
			{
				return 1;
			}
		}
		return 0;
	}

	size_t bucketCount()
	{
		return capacity();
	}

	size_t bucketSize(size_t bucketno)
	{
		HashBucketNode<V> * cur;
		int count = 0;
		for (cur = m_data[bucketno]; cur; cur = cur->next)
		{
			count++;
		}
		return count;
	}

	/*1、Count 判断某一个Key值是否在哈希表里
	  2、BucketCount 返回桶的个数
	  3、BucketSize 返回某个桶中的元素个数*/
};

template<class K, class V, class KeyofValue, class HF>
long long HashBucket<K, V, KeyofValue, HF>::s_m_primeTable[30] = {
	11, 23, 47, 89, 179,
	353, 709, 1409, 2819, 5639,
	11273, 22531, 45061, 90121, 180233,
	360457, 720899, 1441807, 2883593, 5767169,
	11534351, 23068673, 46137359, 92274737, 184549429,
	369098771, 738197549, 1476395029, 2952790016u, 4294967291u
};

调用 "HashBucket.h" 实现 unordered_map.h。
参见代码如下：

// unordered_map.h

#include "HashBucket.h"

template <class K, class V, class HF = dealInt>
class unordered_map
{
	class KeyofValue
	{
	public:
		const K& operator()(const pair<K, V> &data)
		{
			return data.first;
		}
	};

	HashBucket<K, pair<K, V>, KeyofValue, HF> m_hb;

public:
	typename typedef HashBucket<K, pair<K, V>, KeyofValue, HF>::iterator iterator;

	unordered_map():
		m_hb()
	{}

	~unordered_map()
	{
		m_hb.~HashBucket();
	}

	iterator begin()
	{
		return m_hb.begin();
	}

	iterator end()
	{
		return m_hb.end();
	}

	iterator size()
	{
		return m_hb.size();
	}

	iterator find(const V & val)
	{
		return m_hb.find(val);
	}

	size_t count(const K & key)
	{
		return m_hb.count(key); //透传
	}

	void clear()
	{
		return m_hb.clear();
	}

	bool empty()
	{
		return m_hb.empty();
	}

	pair<iterator, bool> insert(const pair<K, V> val)
	{
		return m_hb.insert(val);
	}

	V& operator[](const K & key)
	{
		pair<iterator, bool> ptmp = m_hb.insert(pair<K, V>(key, V()));
		iterator itmp = ptmp.first;
		return (*itmp).second;
	}

	const V& operator[](const K & key) const
	{
		return (*(m_hb.insert(pair<K, V>(key, V()))).first).second;
	}
};

3. 其它

关于 unordered 系列的容器，大多都可以经过修改 "HashBucket.h" 模拟实现，在此不再赘述了。

Y_puyu

发布了359 篇原创文章 · 获赞 259 · 访问量 7万+

私信关注