c++哈希表-map&set (无序)

简述

无序的map、set，称为unordered_map、 unordered_set。采用迭代器遍历出来的元素是无序的，这是因此底层实现数据结构为哈希表。

1、哈希表不同于红黑树，哈希表它的查找效率是o(1)、一个常数的效率。
虽然红黑树是o(logn), 很高的效率，但不及它。
2、哈希表遍历的元素是无序的，红黑树有序的。这也决定他们实现的容器是何性质。

关于哈希表：

不经过任何比较，一次直接从表中得到要搜索的元素。如果构造一种存储结构，通过某种函数(哈希函数)使元素的存储位置与它的关键码之间能够建立一一映射的关系，那么在查找时通过该函数可以很快找到该元素。

哈希函数：1、直接定址法；2、除留余数法；3、平方取中法等
下面采用方法2实现。
-插入
如果插入大于容量大小的元素14，通过哈希函数运算后位置（下标）为4，但是位置为4已经存放了元素。那这个元素放到哪里呢？这个时候就会存在哈希冲突。

解决哈希冲突：

1）、闭散列（线性探测）

解决方法：如果插入的元素发生冲突，代表它这个位置已经被占用，我们使用线性探测找下一个空位置，然后插入该元素，解决哈希冲突。

我们以下面哈希冲突元素44为例：
在这里插入图片描述
为了减少哈希冲突的元素，也为了防止插入的元素大于容量，我们定义一个负载量因子= 元素个数 / 容量大小，一般控制在0.7左右。

 每次插入的时候， 判断负载量大小， 如果大于0.7， 需要扩容，重新分配元素位置。

定义它有两个好处：
1、控制容量范围， 不会超出
2、当扩容后，元素重新根据哈希函数分配位置，减少哈希冲突。
举个例子： 之前容量为10， 插入元素4、14都是同一位置，有冲突。
扩容量为20， 4，14就不是同一位置， 一个是4，一个是14， 无冲突。

闭散列（线性探测）的缺点
发生哈希冲突的元素会占用其他元素的位置，导致其他元素无法插入到正确的位置，也变为哈希冲突元素。

情况严重的时候，变成一个顺序表，查找效率退为o(n)。

线性探测代码

#include<iostream>
#include<vector>

using namespace std;

enum State {
	EXIST,
	DELETE,
	EMPTY
};

template<class K, class V>
struct Node {
	pair<K, V> _value;		//存储的元素
	State _state;	
};

template<class K, class V>
class HashTable {
public:

	typedef Node<K, V> Node;

	HashTable(size_t N = 10) {
		
		//开辟的数组大小
		_table.resize(N);
		//状态滞空
		for (int i = 0; i < N; ++i) {
			_table[i]._state = EMPTY;
		}
		//存放的元素个数
		_size = 0;
	}

	//插入接口
	bool insert(const pair<K, V>& value) {

		//检查容量
		chenkCapacity();
		//得出插入位置
		int index = value.first % _table.size();
		while (_table[index]._state == EXIST) {		//如果这个位置有元素， 就往下遍历

			//插入元素有重复， 直接退出										
			if (_table[index]._value.first == value.first)
				return false;

			++index;

			//如果走到尾的话， 就需要重新开始
			if (index == _table.size()) {
				index = 0;
			}
		}
		_table[index]._value = value;
		_table[index]._state = EXIST;
		++_size;
		return true;
	}

	//检查是否超过负载因子
	void chenkCapacity() {
		//负载因子 = 插入的元素个数 / 数组大小 --> _size / _table.size()

		//标准负载因子规范为 0.7
		if (_size * 10 / _table.size() >= 7) {	//整形/的话， 不会存在小数
			//超过负载因子， 需要扩容

			size_t newsize = _table.size() * 2;
			//创建新表， 旧表中元素重复计算他们的在新表的位置进行插入
			HashTable<K, V>* newHt = new HashTable<K, V>(newsize);
			for (int i = 0; i < _table.size(); ++i) {
				if (_table[i]._state == EXIST) {
					//table中存在的元素，才会进行操作添加到新表中去
					//并且，在调用新表中的inset接口， 会保存在正确的位置
					newHt->insert(_table[i]._value);
				}
			}

			//交换 - 调用vactor中swap接口
			_table.swap(newHt->_table);
		}

	}
	
	//查找接口
	Node* find(const K& key) {

		//找到查找位置
		int index = key % _table.size();
		while (_table[index]._state != EMPTY) {
			//如果插入的位置存在元素的话， 就往后找
			if (_table[index]._state == EXIST && _table[index]._value.first == key) {
				//找到插入的元素
				return &(_table[index]);
			}
			
			++index;

			//判断index是否越界
			if (index == _table.size()) {
				index = 0;
			}
		}
		return nullptr;
	}

	//删除
	bool erase(const K& key){

		Node* node = find(key);
		if (node) {
			//这个的删除是假删除 - 不会真删除， 不然会影响后续数据的查找
			//将要删除的结点状态置为DELETE， 只要不是EMPTY就行
			node->_state = DELETE;
			--_size;
			return true;
		}
		return false;
	}
	
	//遍历
	void HashTable_taervse() {

		for (int i = 0; i < _table.size(); ++i) {
			if (_table[i]._state == EXIST) {
				cout << _table[i]._value.first  << "--->" << _table[i]._value.second << endl;
			}
		}
	}

private:
	vector<Node> _table;
	size_t _size;			//存放元素的个数
};

void testHashTable() {

	HashTable<int, int> ht(2);

	ht.insert(make_pair(1, 1));
	ht.insert(make_pair(2, 1));
	ht.insert(make_pair(17, 1));
	ht.insert(make_pair(15, 1));
	ht.insert(make_pair(12, 1));
	ht.insert(make_pair(99, 1));
	ht.insert(make_pair(4, 1));
	ht.insert(make_pair(5, 1));

	ht.HashTable_taervse();

	cout << endl;
	cout << ht.erase(1) << endl;
	cout << ht.erase(17) << endl;
	cout << ht.erase(99) << endl;

	ht.HashTable_taervse();
}

int main() {
	testHashTable();
	return 0;
}

2）、开散列（哈希桶）

a、解决方法：数组里面存放的不再是元素本身，而是一个链表。发生冲突的元素放到链表的后面，解决冲突。

如下图：
在这里插入图片描述
b、哈希桶也引入负载因子，一般设置为1，同理和线性探测理论一样！

c、哈希桶如果链表长度过长（也就是冲突过多），会导致查找效率退化，和单链表一样！

如果设计哈希桶时冲突的元素会很多， 我们可以将哈希桶里面的元素存放红黑树。
没错， 是红黑树， 众所周知红黑树有着不错的查找效率(logn)
这样一来， 查找的时候如果是冲突元素， 我就去红黑树去找， 能在树头找到更好。
最差查找效率： (logn)        最优效率： (1)

对比两种处理哈希冲突的方式，哈希桶更为优秀，确实如此，在STL关联式容器
unordered_map、unordered_set等底层的数据结构都是采用哈希桶实现的。

简单使用哈希桶实现无序map、set代码

#include<iostream>
#include<vector>
#include<utility>
#include<string>

using namespace std;

//单链表结点
template<class V>
struct HashNode {

	HashNode<V>* _next;
	V _data;		

	HashNode(const V& data) {
		_data = data;
		_next= nullptr;
	}
};

//哈希函数 - 基础的模板函数
template<class K>
struct HashFun {
	int operator()(const K& key) {
		return key;
	}
};

//模板特化(全特化) - 哈希函数特化 - 处理key值为string类型
template<>
struct HashFun<string> {
	int operator()(const string& key) {

		//把string转化为int类型
		int hash = 0;
		for (const auto& ch : key) {
			hash = hash * 131 + ch;
		}
		return hash;
	}
};


//哈希桶类声明
template<class K, class V, class KeyOfValue, class HFun>
class HashBucket;
//迭代器类
template<class K, class V, class KeyOfValue, class HFun>
struct Hiterator {

	//重命名哈希桶类型
	typedef HashBucket<K, V, KeyOfValue, HFun> htable;
	//重命名结点类型
	typedef HashNode<V> Node;
	//重命名结点指针类型
	typedef Node* pNode;
	//重命名迭代器类本身
	typedef Hiterator<K, V, KeyOfValue, HFun> Self;

	//定义一个存在传入结点
	pNode _node;
	htable* _ht;

	//构造函数
	Hiterator(const pNode& node, htable* ht)
		:_node(node)
		,_ht(ht)
	{

	}
	
	//重载*
	V& operator*() {
		return _node->_data;
	}

	//重载->
	V* operator->() {
		return &(_node->_data);
	}

	//重载!=
	bool operator!=(const Self& it) {
		return _node != it._node;
	}

	//++
	Self& operator++() {

		if (_node->_next) {
			_node = _node->_next;
		}
		else {

			KeyOfValue kv;
			HFun hf;
			//找到插入的位置
			int index = hf(kv(_node->_data)) % _ht->_table.size();
		
			++index;
			if (index == _ht->_table.size()) {
				_node = _node->_next;
				return *this;
			}
			while(_ht->_table[index] == nullptr) {
				
				if (index == _ht->_table.size() - 1) {
					//该结点为最后一个结点
					break;
				}
				++index;
			}
			//找到了非空链表的头节点
			_node = _ht->_table[index];
		}
		return *this;
	}
	
};



//哈希桶类
template<class K, class V, class KeyOfValue, class HFun>
class HashBucket {

public:

	//声明迭代器类为友元类 - 这样迭代器类中就可以使用到哈希桶的私有属性vector， 
	//从而能够使用哈希桶来寻找结点位置。
	template<class K, class V, class KeyOfValue, class HFun>
	friend struct Hiterator;

	//重定义结点
	typedef HashNode<V> Node;
	//重定义结点指针
	typedef Node* pNode;
	//重定义迭代器类
	typedef Hiterator<K, V, KeyOfValue, HFun> iterator;

	//迭代器 - begin
	iterator begin() {
		/*
			begin对应哈希桶头一个带头链表首结点
		*/
		int index = 0;
		while (_table[index] == nullptr) {
			
			if (index == _table.size() - 1) {
				return iterator(_table[index], this);
			}
			++index;
		}

		return iterator(_table[index], this);
	}
	
	//迭代器 - end
	iterator end() {
		/*
			end对应的是哈希桶的最后一个头链表的尾节点 - nullptr
			因此， 这块随便用一个nullptr就行。
		*/
		int index = 0;
		while (_table[index] != nullptr) {
			++index;
		}
		return iterator(_table[index], this);
	}

	//插入接口 - 为了模拟map、set接口， V目前用map实现， 为pair类
	pair<iterator, bool> insert(const V& data) {	
		
		//检查容量
		checkCapacity();
		
		//创建仿函数对象 - 来获取准确的V值
		KeyOfValue kov;
		HFun hf;

		//获取插入位置
		int index = hf(kov(data)) % _table.size();

		pNode cur = _table[index];
		while (cur) {
			
			if (kov(cur->_data) == kov(data)) {
				//有重复元素
				return make_pair(iterator(cur, this), false);
			}
			cur = cur->_next;
		}

		//头插法
		cur = new Node(data);
		cur->_next = _table[index];
		_table[index] = cur; 

		++_size;
		return make_pair(iterator(cur, this), true);
	}

	//判断容器是否合理， 不合理需要重新构建
	void checkCapacity() {

		//负载因子 可以为 1
		if (_size == _table.size()) {
			//超过负载因子， 需要扩容

			size_t newsize = (_table.size() == 0) ? 10 : 2 * _table.size();
			//创建新表
			vector<pNode> newhashB;
			newhashB.resize(newsize);

	
			KeyOfValue kv;
			HFun hf;

			for (int i = 0; i < _table.size(); ++i) {
				pNode cur = _table[i];
				while (cur) {
					//计算新的插入位置
					int index = hf(kv(cur->_data)) % newhashB.size();
					//保存next	
					pNode next = cur->_next;
					//头插
					cur->_next = newhashB[i];
					newhashB[i] = cur;
					
					cur = next;
				}
				//旧表当前的位置元素置为空
				_table[i] = nullptr;
			}
			//交换表
			_table.swap(newhashB);			
		}
	}
		
private:
	//指针数组
	vector<pNode> _table;
	size_t _size;
};




//模拟实现map
template<class K, class V, class HFun = HashFun<K>>
class UnorderedMap {
	
	//定义仿函数
	struct MapKeyOfValue {
		const K& operator()(const pair<K, V>& value) {
			return value.first;
		}
	};
public:

	typedef typename HashBucket<K, pair<K, V>, MapKeyOfValue, HFun>::iterator iterator;

	iterator begin() {
		return _hb.begin();
	}

	iterator end() {
		return _hb.end();
	}

	V& operator[](const K& key) {
		pair<iterator, bool> ret = _hb.insert(make_pair(key, V()));
		iterator it = ret.first;
		return (*it).second;
	}

	bool insert(const pair<K, V> value) {
		return _hb.insert(value).second;
	}

private:
	HashBucket<K, pair<K, V>, MapKeyOfValue, HFun> _hb;
};

//模拟实现set
template<class K, class HFun = HashFun<K>>
class UnorderedSet {

	struct SetKeyOfValue {
		const K& operator()(const K& value) {
			return value;
		}
	};
public:
	typedef typename HashBucket<K, K, SetKeyOfValue, HFun>::iterator iterator;
	
	iterator begin() {
		return _hb.begin();
	}

	iterator end() {
		return _hb.end();
	}
	
	bool insert(const K& value) {
		return _hb.insert(value).second;
	}
private:
	HashBucket<K, K, SetKeyOfValue, HFun> _hb;
};

void test_map() {
	UnorderedMap<int, int> up;


	cout << "---test_unordered_map---" << endl;

	up.insert(make_pair(5, 1));
	up.insert(make_pair(4, 1));
	up.insert(make_pair(10, 1));
	up.insert(make_pair(2, 1));
	up.insert(make_pair(1, 1));

	up[5] = 10;
	up[44] = 44;

	//auto it = up.begin();
	for (auto it : up) {
		cout << it.first << "--->" << it.second << endl;
	}
}

void test_string_map() {

	cout << "---test key of string situation---" << endl;

	UnorderedMap<string, int> up1;

	up1.insert(make_pair("a", 1));
	up1.insert(make_pair("b", 1));
	up1.insert(make_pair("aa", 1));
	up1.insert(make_pair("bb", 1));
	up1.insert(make_pair("c", 1));

	up1["aa"] = 4;
	up1["a"] = 16;

	for (auto it : up1) {
		cout << it.first << "--->" << it.second << endl;
	}

	cout << endl;
}

void test_set() {

	cout << "---test_unordered_set---" << endl;

	UnorderedSet<int> set;

	set.insert(10);
	set.insert(9);
	set.insert(4);
	set.insert(5);
	set.insert(1);
	set.insert(9);

	for (auto it : set) {
		cout << it << endl;
	}
}


int main() {

	test_map();
	test_set();
	test_string_map();
	return 0;
}

GlorygloryGlory

发布了25 篇原创文章 · 获赞 16 · 访问量 914

私信关注