C++05容器(二) 用线性探测法实现的hash表,海量数据处理

1.用线性探测法实现的hash表

// 计算哈希值的类
template<typename T>
class CHash
{
public:
	int operator()(const T &val)
	{
		// 默认用除留余数法
		return val;
	}
}; // string  User  People

template<>
class CHash<string>
{
public:
	int operator()(const string &val)
	{
		// hello  olleh
		int sum = 0;
		for (int i = 0; i < val.length(); ++i)
		{
			sum += (val[i]>>i);
		}
		return sum;
	}
};

// 线性探测法实现的哈希表结构 
template<typename T, typename HashType= CHash<T>>
class CHashTable
{
public:
	CHashTable(int size = 3, double lf = 0.75)
		:_loadFactor(lf), _usedBuckets(0)
	{
		// 给哈希表开辟数组空间的
		_hashVec.resize(size);
	}
	void put(const T &val)
	{
		double lf = _usedBuckets * 1.0 / _hashVec.size();
		cout << "size:"<< _hashVec.size() << " loadfactor:" << lf << endl;
		if (lf >= _loadFactor)
		{
			resize();
		}
		int index = _hash(val) % _hashVec.size();
		for (int i = index;;i = (i + 1) % _hashVec.size())
		{
			// STATE_UNUSE STATE_USE STATE_USED
			if (_hashVec[i]._state != STATE_USE)
			{
				_hashVec[i]._data = val;
				_hashVec[i]._state = STATE_USE;
				_usedBuckets++;
				break;
			}
		}
	}
	// 删除哈希表中的元素
	void remove(const T &val)
	{
		int index = _hash(val) % _hashVec.size();
		int flag = (index - 1 + _hashVec.size()) % _hashVec.size();
		for (int i = index; ;i = (i + 1) % _hashVec.size())
		{
			if (_hashVec[i]._state == STATE_UNUSE)
				return;

			if (_hashVec[i]._state == STATE_USE
				&&_hashVec[i]._data == val)
			{
				_hashVec[i]._state = STATE_USED;
				_usedBuckets--;
				break;
			}

			if (i == flag)
			{
				return;
			}
		}
	}
	// 在哈希表中查找元素
	bool query(const T &val)
	{
		int index = _hash(val) % _hashVec.size();
		int flag = (index - 1 + _hashVec.size()) % _hashVec.size();
		for (int i = index; ;i = (i + 1) % _hashVec.size())
		{
			if (_hashVec[i]._state == STATE_UNUSE)
				return;
			if (_hashVec[i]._state == STATE_USE 
				&& _hashVec[i]._data == val)
			{
				return true;
			}
			if (i == flag)
			{
				return false;
			}
		}
	}
private:
	// STATE_UNUSE   删除 STATE_USED 0
	enum STATE{ STATE_UNUSE, STATE_USE, STATE_USED };
	struct Node
	{
		Node(T data = T())
			:_data(data), _state(STATE_UNUSE)
		{}
		T _data;
		STATE _state;
	};
	//vector<Node> _hashVec; 
	vector<list<T>> _hashVec;
	double _loadFactor; // 记录加载因子
	int _usedBuckets;
	HashType _hash;  // 专门计算T类型对象的哈希值的
	//static int _prime[] = {3,11,29, }; 枚举素数

	//获取素数
	int getPrime(int n)
	{
		for (int i = n + 1;; ++i)
		{
			int k = sqrt(i);
			int j = 2;
			for (; j <= k; ++j)
			{
				if (i % j == 0)
					break;
			}
			if (j > k)
			{
				return i;
			}
			else
			{
				continue;
			}
		}
	}

	// 动态扩容
	void resize()
	{
		vector<Node> _oldhash;
		_oldhash.swap(_hashVec);
		_usedBuckets = 0;

		int prime = getPrime(_oldhash.size());
		_hashVec.resize(prime);

		//vector<Node> _newhash;
		//_newhash.resize(get_prime(_hashVec.size()));
		for (int i = 0; i < _oldhash.size(); ++i)
		{
			// STATE_UNUSE STATE_USE STATE_USED
			if (_oldhash[i]._state == STATE_USE)
			{
				put(_oldhash[i]._data);
				/*int index = _hashVec[i]._data % _newhash.size();
				for (int j = index; ; j = (j + 1) % _newhash.size())
				{
					if (_newhash[j]._state == STATE_UNUSE || _newhash[j]._state == STATE_USED)
					{
						_newhash[j]._data = _hashVec[i]._data;
						_newhash[j]._state = STATE_USE;
						_len++;
						_loadFactor = (double)_len / _newhash.size();
						break;
					}
					continue;
				}*/
			}
		}
		//_hashVec.erase(_hashVec.begin(), _hashVec.end());
		//_hashVec = _newhash;
	}
};

int main()
{
	CHashTable<int> hash;
	srand(time(NULL));
	for (int i = 0; i < 8; i++)
	{
		//hash.put(rand() % 100 + 1);
		hash.put(i);
	}
	
	hash.put(2);
	hash.display();
	hash.remove(2);
	hash.display();
	hash.put(2);
	hash.display();
	cout << hash.query(3) << endl;
	cout << hash.query(2) << endl;
	return 0;
}

2.海量数据处理 top k 查重(哈希表)

int main()
{
	// 海量数据处理  top k  查重(哈希表)
	vector<uint> vec;
	for (uint = 0; i < 20000000; ++i)
	{
		vec.push_back(rand() + i);
	}
	// 请在最短的时间内,找到所有整数中,最大/小的10个元素并且打印
	// 找top k大的用小根堆   top k小的用大根堆
	// O(n)*log210
	priority_queue<int> maxHeap;
	int k = 10;
	for (int i = 0; i < k; ++i)
	{
		maxHeap.push(vec[i]);
	}

	for (i = k; i < 20000000; ++i)
	{
		if (vec[i] < maxHeap.top())
		{
			maxHeap.pop();
			maxHeap.push(vec[i]);
		}
	}

	while (!maxHeap.empty())
	{
		cout << maxHeap.top() << " ";
		maxHeap.pop();
	}
	cout << endl;

	return 0;
}

3. key 链地址法实现的哈希表结构 【key,id value Person】

// key 链地址法实现的哈希表结构 【key,id value Person】 
template<typename K, typename V>
struct MyPair
{
	MyPair(K k = K(), V v = V())
		:first(k), second(v) {}
	K first; // key
	V second; // value
};
template<typename K, typename V>
bool operator==(const MyPair<K, V> &lhs, const MyPair<K, V> &rhs)
{
	return lhs.first == rhs.first;
}

template<typename K, typename V>
MyPair<K, V> mymake_pair(const K &key, const V &val)
{
	return MyPair<K, V>(key, val);
}

template<typename K, typename V, typename HashType = CHash<K>>
class CHashMap // 映射表  key -> value
{
public:
	CHashMap(int size = 3, double lf = 0.75)
		:_loadFactor(lf), _usedBuckets(0)
	{
		// 给哈希表开辟数组空间的
		_hashVec.resize(size);
	}
	void put(const MyPair<K, V> &pair)
	{
		double lf = _usedBuckets * 1.0 / _hashVec.size();
		cout << "size:" << _hashVec.size() << " loadfactor:" << lf << endl;
		if (lf >= _loadFactor)
		{
			resize();
		}
		int index = _hash(pair.first) % _hashVec.size();
		list<MyPair<K,V>> &curList = _hashVec[index];
		if (curList.empty())
		{
			curList.push_front(pair);
			_usedBuckets++;
		}
		else
		{
			auto it = find(curList.begin(), curList.end(), pair);
			if (it == curList.end())
			{
				curList.push_front(pair);
			}
		}
	}
	// 删除哈希表中的元素
	void remove(const K &key)
	{
		int index = _hash(key) % _hashVec.size();
		list<MyPair<K, V>> &curList = _hashVec[index];
		if (!curList.empty())
		{
			// 1.在list怎么找val
			auto it = find(curList.begin(), curList.end(), MyPair<K,V>(key));
			// 2.找到val,删除,找不到,return
			if (it != curList.end())
			{
				curList.erase(it);
				// 3.删除val,如果桶变成空的,要给
				if (curList.empty())
				{
					_usedBuckets--;
				}
			}
		}
	}
	// 在哈希表中查找元素   table.query(10);    table[10]
	// map["3452346"] 1.查询功能 2.赋值功能 map["3452346"]="zhangsan" 3.增加
	V& operator[](const K &key)  // MyPair<K,V>
	{
		int index = _hash(key) % _hashVec.size();
		list<MyPair<K, V>> &curList = _hashVec[index];
		if (!curList.empty())
		{
			// 1.在list怎么找val
			auto it = find(curList.begin(), curList.end(), MyPair<K, V>(key));
			// 2.找到val,删除,找不到,return
			if (it != curList.end())
			{
				return it->second;
			}
		}
		// 没有找到
		curList.push_front(MyPair<K,V>(key));
		//return curList.insert(curList.begin(), MyPair<K, V>(key))->second;
		return curList.begin()->second;
	}
private:
	vector<list<MyPair<K, V>>> _hashVec;
	double _loadFactor; // 记录加载因子
	int _usedBuckets;
	HashType _hash;  // 专门计算T类型对象的哈希值的

	//获取素数
	int getPrime(int n)
	{
		for (int i = n + 1;; ++i)
		{
			int k = sqrt(i);
			int j = 2;
			for (; j <= k; ++j)
			{
				if (i % j == 0)
					break;
			}
			if (j > k)
			{
				return i;
			}
			else
			{
				continue;
			}
		}
	}

	// 动态扩容
	void resize()
	{
		// 1.先把现有的哈希表交换到老的容器当中
		vector<list<MyPair<K, V>>> _oldhash;
		_oldhash.swap(_hashVec);

		// 2.给_hashVec resize桶内存
		_hashVec.resize(getPrime(_oldhash.size()));

		// 3.遍历old哈希表,的每一个链表的节点,计算其散列码,splice到新的hash中
		for (auto it = _oldhash.begin(); // it -> list<int>
			it != _oldhash.end();
			++it)
		{
			if (!it->empty())
			{
				for (auto it1 = it->begin(); it1 != it->end();)
				{
					int index = _hash(it1->first) % _hashVec.size();
					list<MyPair<K, V>> &mylist = _hashVec[index];
					if (!mylist.empty())
					{
						// 4.如果桶被第一次占用,_usedBuckets++
						_usedBuckets++;
					}
					// 把旧的hash表中的list节点直接搬到新的哈希表当中
					mylist.splice(mylist.begin(), *it, it1);
					it1 = it->begin();
				}
			}
		}
	}
};
int main()
{
	CHashMap<int, int> hashMap;
	
	hashMap.put(mymake_pair(10, 20));
	hashMap.put(mymake_pair(14, 45));
	hashMap.put(mymake_pair(18, 73));
	hashMap.put(mymake_pair(19, 89));
	hashMap[20] = 987;

	cout << hashMap[10] << endl;




	return 0;
}

4.unordered_set us1;

int main()
{
	unordered_set<int> us1;
	unordered_multiset<int> us2;

	for (int i = 0; i < 100; ++i)
	{
		us1.insert(rand() % 20);
	}
	cout << us1.count(15) << endl;

	for (int i = 0; i < 100; ++i)
	{
		us2.insert(rand() % 20);
	}
	cout << us2.count(15) << endl;

	us1.erase(15);

	auto it = us1.begin();
	for (; it != us1.end(); ++it)
	{
		cout << *it << " ";
	}
	cout << endl;

	us2.find(23);

	for_each(us2.begin(), us2.end(), 
		[](int a)->void {
		cout << a << " "; 
	});

	vector<int> vec;
	unordered_multiset<int> us3;
	// 1.遍历vec的所有元素
	// 2.找当前元素是否在us3中,如果没在,添加,如果在了,
	// 3.us3.count(val);

	return 0;
}

5.容器的空间适配器

容器的空间配置器allocator
目的:把对象的内存开辟,和对象构造分开
     把对象的析构,和内存释放分开
	 class allocator

	 construct : 构造 如何在一个存在的内存上构造对象
	 destroy : 析构  如何只调用对象的析构函数

	 allocate : 开辟内存   malloc
	 deallocate : 释放内存  free
*/
// 实现容器的空间配置器
template<typename T>
class Allocator
{
public:
	T* allocate(size_t size) // 开辟内存
	{
		return (T*)malloc(size);
	}
	void deallocate(void *ptr) // 释放内存
	{
		free(ptr);
	}
	void construct(T *ptr, const T &val) // 构造
	{
		new (ptr) T(val);
	}
	void destroy(T *ptr) // 析构
	{
		ptr->~T();
	}
};

template<typename T, 
		typename allocator = Allocator<T>>
class Vector
{
public:
	// 按指定size进行构造,size个空间,没有元素
	Vector(int size = 0)
	{
		if (size == 0)
		{
			_first._ptr = _last._ptr = _end._ptr = nullptr;
		}
		else
		{
			_first._ptr = mAllocator.allocate(size * sizeof(T));
			_last._ptr = _first._ptr;
			_end._ptr = _first._ptr + size;
		}
	}
	// 按指定size进行构造,添加size个元素,元素值是val
	Vector(int size, const T &val)
	{
		_first._ptr = mAllocator.allocate(size * sizeof(T));
		for (int i = 0; i < size; ++i)
		{
			mAllocator.construct(_first._ptr+i, val);
		}
		_last._ptr = _end._ptr = _first._ptr + size;
	}
	// 按[first, last)区间的元素来构造Vector
	Vector(T *first, T *last)
	{
		int size = last - first;
		_first._ptr = mAllocator.allocate(size * sizeof(T));
		for (int i=0; first < last; ++first,++i)
		{
			mAllocator.construct(_first._ptr + i, *first);
		}
		_last._ptr = _end._ptr = _first._ptr + size;
	}
	~Vector() 
	{ 
		// 析构有效的对象
		for (T *p=_first._ptr; p < _last._ptr; ++p)
		{
			mAllocator.destroy(p);
		}
		// 释放内存
		mAllocator.deallocate(_first._ptr);
	}
	// 从末尾添加元素
	void push_back(const T &val)
	{
		if (full())
			resize();
		//mpVec[mCur++] = val;
		mAllocator.construct(_last._ptr, val);
		_last._ptr++;
	}
	// 从末尾删除元素
	void pop_back()
	{
		if (empty())
			return;
		--_last._ptr;
		mAllocator.destroy(_last._ptr);
	}
	bool full()const { return _last == _end; }
	bool empty()const { return _last == _first; }
	// 返回容器元素的个数
	int size()const { return _last - _first; }
	// Vector的迭代器
	class iterator
	{
	public:
		// 定义友元类 
		friend class Vector<T>;

		iterator(T *p = nullptr) 
			:_ptr(p) {}
		bool operator!=(const iterator &it)const
		{
			return _ptr != it._ptr;
		}
		bool operator==(const iterator &it)const
		{
			return _ptr == it._ptr;
		}
		int operator-(const iterator &it)const
		{
			return _ptr - it._ptr;
		}
		void operator++() { _ptr++; }
		void operator--() { _ptr--; }
		T& operator*() { return *_ptr; }
	private:
		T *_ptr; // size
	};
	iterator begin() { return iterator(_first._ptr); }
	iterator end() { return iterator(_first._ptr + size()); }
	
	// 给it迭代器的位置,插入一个值为val的对象,返回插入位置的新的迭代器
	iterator insert(iterator it, const T &val)
	{
		// _first    _last [_first, _last]     _end
		if (_last == _end)
		{
			int offset = it._ptr - _first._ptr;
			resize();
			it._ptr = _first._ptr + offset;
		}

		for (T *p = _last._ptr-1; p >= it._ptr; --p)
		{
			mAllocator.construct(p+1, *p);
			mAllocator.destroy(p);
		}
		++_last;
		mAllocator.construct(it._ptr, val);
		return it;
	}

	// 删除it迭代器指向的位置,返回删除位置的最新的迭代器
	iterator erase(iterator it)  // it _ptr size:10
	{
		for (T *p = it._ptr; p < _last._ptr; ++p)
		{
			mAllocator.destroy(p);
			mAllocator.construct(p, *(p+1));
		}
		--_last; // _ptr--  size--
		return it;  // 为什么it没变,要进行返回
	}
private:
	iterator _first; // 指向起始位置
	iterator _last; // 最后一个元素的下一个位置
	iterator _end; // 指向末尾的下一个位置
	allocator mAllocator;  // 存储容器的空间配置器

	// 容器内存2倍扩容
	void resize()
	{
		if (_first._ptr == nullptr)
		{
			_first._ptr = mAllocator.allocate(sizeof(T));
			_last._ptr = _first._ptr;
			_end._ptr = _first._ptr + 1;
		}
		else
		{
			int size = _last._ptr - _first._ptr;
			T *ptmp = mAllocator.allocate(2 * sizeof(T) * size);
			for (int i = 0; i < size; ++i)
			{
				mAllocator.construct(ptmp+i, _first._ptr[i]);
			}
			
			for (int i = 0; i < size; ++i)
			{
				mAllocator.destroy(_first._ptr + i);
			}
			mAllocator.deallocate(_first._ptr);
			
			_first._ptr = ptmp;
			_last._ptr = _first._ptr + size;
			_end._ptr = _last._ptr + size;
		}
	}
};

class A
{
public:
	A() :p(new int[2]) { cout << "A()" << endl; }
	A(const A &src) { cout << "A(const A&)" << endl; }
	~A() { cout << "~A()" << endl; }
private:
	int *p;
};
int main()
{
	Vector<int> vec;
	for (int i = 0; i < 20; ++i)
	{
		vec.push_back(rand() % 100);
	}
	// foreach遍历
	for (int val : vec)
	{
		cout << val << " ";
	}
	cout << endl;

	Vector<int>::iterator it1 = vec.begin();
	for (; it1 != vec.end(); ++it1)
	{
		cout << *it1 << " ";
	}
	cout << endl;

	vec.insert(vec.begin(), 100);

	it1 = vec.begin();
	for (; it1 != vec.end(); ++it1)
	{
		cout << *it1 << " ";
	}
	cout << endl;

	vec.erase(vec.begin());

	it1 = vec.begin();
	for (; it1 != vec.end(); ++it1)
	{
		cout << *it1 << " ";
	}
	cout << endl;
#if 0
	A a1, a2, a3;
	cout << "------------" << endl;

	// 这里只需要空间,不需要构造对象  malloc
	Vector<A> vec(100);
	vec.push_back(a1);
	vec.push_back(a2);
	vec.pop_back();
	vec.push_back(a3);




	Vector<int> vec1; // 底层不开辟空间
	//vec1.push_back(10); // 0 - 1 - 2 - 4 - 8 - 16 - 32 - 64 - 128
	//vec1.push_back(20);
	for (int i = 0; i < 20; ++i)
	{
		vec1.push_back(rand() % 100 + 1);
	}
	cout << vec1.size() << endl;
	
	// 用通用的迭代器遍历方式,遍历vec1,并打印容器中所有的元素值
	Vector<int>::iterator it1 = vec1.begin();
	for (; it1 != vec1.end(); ++it1)
	{
		cout << *it1 << " ";
	}
	cout << endl;

	Vector<int> vec2(10, 20);
	int arr[] = { 12,4,56,7,89 };
	Vector<int> vec3(arr, arr + sizeof(arr) / sizeof(arr[0]));
#endif
	return 0;
}

猜你喜欢

转载自blog.csdn.net/sunshinecandy/article/details/89280575