1.用线性探测法实现的hash表
// 计算哈希值的类
template<typename T>
class CHash
{
public:
int operator()(const T &val)
{
// 默认用除留余数法
return val;
}
}; // string User People
template<>
class CHash<string>
{
public:
int operator()(const string &val)
{
// hello olleh
int sum = 0;
for (int i = 0; i < val.length(); ++i)
{
sum += (val[i]>>i);
}
return sum;
}
};
// 线性探测法实现的哈希表结构
template<typename T, typename HashType= CHash<T>>
class CHashTable
{
public:
CHashTable(int size = 3, double lf = 0.75)
:_loadFactor(lf), _usedBuckets(0)
{
// 给哈希表开辟数组空间的
_hashVec.resize(size);
}
void put(const T &val)
{
double lf = _usedBuckets * 1.0 / _hashVec.size();
cout << "size:"<< _hashVec.size() << " loadfactor:" << lf << endl;
if (lf >= _loadFactor)
{
resize();
}
int index = _hash(val) % _hashVec.size();
for (int i = index;;i = (i + 1) % _hashVec.size())
{
// STATE_UNUSE STATE_USE STATE_USED
if (_hashVec[i]._state != STATE_USE)
{
_hashVec[i]._data = val;
_hashVec[i]._state = STATE_USE;
_usedBuckets++;
break;
}
}
}
// 删除哈希表中的元素
void remove(const T &val)
{
int index = _hash(val) % _hashVec.size();
int flag = (index - 1 + _hashVec.size()) % _hashVec.size();
for (int i = index; ;i = (i + 1) % _hashVec.size())
{
if (_hashVec[i]._state == STATE_UNUSE)
return;
if (_hashVec[i]._state == STATE_USE
&&_hashVec[i]._data == val)
{
_hashVec[i]._state = STATE_USED;
_usedBuckets--;
break;
}
if (i == flag)
{
return;
}
}
}
// 在哈希表中查找元素
bool query(const T &val)
{
int index = _hash(val) % _hashVec.size();
int flag = (index - 1 + _hashVec.size()) % _hashVec.size();
for (int i = index; ;i = (i + 1) % _hashVec.size())
{
if (_hashVec[i]._state == STATE_UNUSE)
return;
if (_hashVec[i]._state == STATE_USE
&& _hashVec[i]._data == val)
{
return true;
}
if (i == flag)
{
return false;
}
}
}
private:
// STATE_UNUSE 删除 STATE_USED 0
enum STATE{ STATE_UNUSE, STATE_USE, STATE_USED };
struct Node
{
Node(T data = T())
:_data(data), _state(STATE_UNUSE)
{}
T _data;
STATE _state;
};
//vector<Node> _hashVec;
vector<list<T>> _hashVec;
double _loadFactor; // 记录加载因子
int _usedBuckets;
HashType _hash; // 专门计算T类型对象的哈希值的
//static int _prime[] = {3,11,29, }; 枚举素数
//获取素数
int getPrime(int n)
{
for (int i = n + 1;; ++i)
{
int k = sqrt(i);
int j = 2;
for (; j <= k; ++j)
{
if (i % j == 0)
break;
}
if (j > k)
{
return i;
}
else
{
continue;
}
}
}
// 动态扩容
void resize()
{
vector<Node> _oldhash;
_oldhash.swap(_hashVec);
_usedBuckets = 0;
int prime = getPrime(_oldhash.size());
_hashVec.resize(prime);
//vector<Node> _newhash;
//_newhash.resize(get_prime(_hashVec.size()));
for (int i = 0; i < _oldhash.size(); ++i)
{
// STATE_UNUSE STATE_USE STATE_USED
if (_oldhash[i]._state == STATE_USE)
{
put(_oldhash[i]._data);
/*int index = _hashVec[i]._data % _newhash.size();
for (int j = index; ; j = (j + 1) % _newhash.size())
{
if (_newhash[j]._state == STATE_UNUSE || _newhash[j]._state == STATE_USED)
{
_newhash[j]._data = _hashVec[i]._data;
_newhash[j]._state = STATE_USE;
_len++;
_loadFactor = (double)_len / _newhash.size();
break;
}
continue;
}*/
}
}
//_hashVec.erase(_hashVec.begin(), _hashVec.end());
//_hashVec = _newhash;
}
};
int main()
{
CHashTable<int> hash;
srand(time(NULL));
for (int i = 0; i < 8; i++)
{
//hash.put(rand() % 100 + 1);
hash.put(i);
}
hash.put(2);
hash.display();
hash.remove(2);
hash.display();
hash.put(2);
hash.display();
cout << hash.query(3) << endl;
cout << hash.query(2) << endl;
return 0;
}
2.海量数据处理 top k 查重(哈希表)
int main()
{
// 海量数据处理 top k 查重(哈希表)
vector<uint> vec;
for (uint = 0; i < 20000000; ++i)
{
vec.push_back(rand() + i);
}
// 请在最短的时间内,找到所有整数中,最大/小的10个元素并且打印
// 找top k大的用小根堆 top k小的用大根堆
// O(n)*log210
priority_queue<int> maxHeap;
int k = 10;
for (int i = 0; i < k; ++i)
{
maxHeap.push(vec[i]);
}
for (i = k; i < 20000000; ++i)
{
if (vec[i] < maxHeap.top())
{
maxHeap.pop();
maxHeap.push(vec[i]);
}
}
while (!maxHeap.empty())
{
cout << maxHeap.top() << " ";
maxHeap.pop();
}
cout << endl;
return 0;
}
3. key 链地址法实现的哈希表结构 【key,id value Person】
// key 链地址法实现的哈希表结构 【key,id value Person】
template<typename K, typename V>
struct MyPair
{
MyPair(K k = K(), V v = V())
:first(k), second(v) {}
K first; // key
V second; // value
};
template<typename K, typename V>
bool operator==(const MyPair<K, V> &lhs, const MyPair<K, V> &rhs)
{
return lhs.first == rhs.first;
}
template<typename K, typename V>
MyPair<K, V> mymake_pair(const K &key, const V &val)
{
return MyPair<K, V>(key, val);
}
template<typename K, typename V, typename HashType = CHash<K>>
class CHashMap // 映射表 key -> value
{
public:
CHashMap(int size = 3, double lf = 0.75)
:_loadFactor(lf), _usedBuckets(0)
{
// 给哈希表开辟数组空间的
_hashVec.resize(size);
}
void put(const MyPair<K, V> &pair)
{
double lf = _usedBuckets * 1.0 / _hashVec.size();
cout << "size:" << _hashVec.size() << " loadfactor:" << lf << endl;
if (lf >= _loadFactor)
{
resize();
}
int index = _hash(pair.first) % _hashVec.size();
list<MyPair<K,V>> &curList = _hashVec[index];
if (curList.empty())
{
curList.push_front(pair);
_usedBuckets++;
}
else
{
auto it = find(curList.begin(), curList.end(), pair);
if (it == curList.end())
{
curList.push_front(pair);
}
}
}
// 删除哈希表中的元素
void remove(const K &key)
{
int index = _hash(key) % _hashVec.size();
list<MyPair<K, V>> &curList = _hashVec[index];
if (!curList.empty())
{
// 1.在list怎么找val
auto it = find(curList.begin(), curList.end(), MyPair<K,V>(key));
// 2.找到val,删除,找不到,return
if (it != curList.end())
{
curList.erase(it);
// 3.删除val,如果桶变成空的,要给
if (curList.empty())
{
_usedBuckets--;
}
}
}
}
// 在哈希表中查找元素 table.query(10); table[10]
// map["3452346"] 1.查询功能 2.赋值功能 map["3452346"]="zhangsan" 3.增加
V& operator[](const K &key) // MyPair<K,V>
{
int index = _hash(key) % _hashVec.size();
list<MyPair<K, V>> &curList = _hashVec[index];
if (!curList.empty())
{
// 1.在list怎么找val
auto it = find(curList.begin(), curList.end(), MyPair<K, V>(key));
// 2.找到val,删除,找不到,return
if (it != curList.end())
{
return it->second;
}
}
// 没有找到
curList.push_front(MyPair<K,V>(key));
//return curList.insert(curList.begin(), MyPair<K, V>(key))->second;
return curList.begin()->second;
}
private:
vector<list<MyPair<K, V>>> _hashVec;
double _loadFactor; // 记录加载因子
int _usedBuckets;
HashType _hash; // 专门计算T类型对象的哈希值的
//获取素数
int getPrime(int n)
{
for (int i = n + 1;; ++i)
{
int k = sqrt(i);
int j = 2;
for (; j <= k; ++j)
{
if (i % j == 0)
break;
}
if (j > k)
{
return i;
}
else
{
continue;
}
}
}
// 动态扩容
void resize()
{
// 1.先把现有的哈希表交换到老的容器当中
vector<list<MyPair<K, V>>> _oldhash;
_oldhash.swap(_hashVec);
// 2.给_hashVec resize桶内存
_hashVec.resize(getPrime(_oldhash.size()));
// 3.遍历old哈希表,的每一个链表的节点,计算其散列码,splice到新的hash中
for (auto it = _oldhash.begin(); // it -> list<int>
it != _oldhash.end();
++it)
{
if (!it->empty())
{
for (auto it1 = it->begin(); it1 != it->end();)
{
int index = _hash(it1->first) % _hashVec.size();
list<MyPair<K, V>> &mylist = _hashVec[index];
if (!mylist.empty())
{
// 4.如果桶被第一次占用,_usedBuckets++
_usedBuckets++;
}
// 把旧的hash表中的list节点直接搬到新的哈希表当中
mylist.splice(mylist.begin(), *it, it1);
it1 = it->begin();
}
}
}
}
};
int main()
{
CHashMap<int, int> hashMap;
hashMap.put(mymake_pair(10, 20));
hashMap.put(mymake_pair(14, 45));
hashMap.put(mymake_pair(18, 73));
hashMap.put(mymake_pair(19, 89));
hashMap[20] = 987;
cout << hashMap[10] << endl;
return 0;
}
4.unordered_set us1;
int main()
{
unordered_set<int> us1;
unordered_multiset<int> us2;
for (int i = 0; i < 100; ++i)
{
us1.insert(rand() % 20);
}
cout << us1.count(15) << endl;
for (int i = 0; i < 100; ++i)
{
us2.insert(rand() % 20);
}
cout << us2.count(15) << endl;
us1.erase(15);
auto it = us1.begin();
for (; it != us1.end(); ++it)
{
cout << *it << " ";
}
cout << endl;
us2.find(23);
for_each(us2.begin(), us2.end(),
[](int a)->void {
cout << a << " ";
});
vector<int> vec;
unordered_multiset<int> us3;
// 1.遍历vec的所有元素
// 2.找当前元素是否在us3中,如果没在,添加,如果在了,
// 3.us3.count(val);
return 0;
}
5.容器的空间适配器
容器的空间配置器allocator
目的:把对象的内存开辟,和对象构造分开
把对象的析构,和内存释放分开
class allocator
construct : 构造 如何在一个存在的内存上构造对象
destroy : 析构 如何只调用对象的析构函数
allocate : 开辟内存 malloc
deallocate : 释放内存 free
*/
// 实现容器的空间配置器
template<typename T>
class Allocator
{
public:
T* allocate(size_t size) // 开辟内存
{
return (T*)malloc(size);
}
void deallocate(void *ptr) // 释放内存
{
free(ptr);
}
void construct(T *ptr, const T &val) // 构造
{
new (ptr) T(val);
}
void destroy(T *ptr) // 析构
{
ptr->~T();
}
};
template<typename T,
typename allocator = Allocator<T>>
class Vector
{
public:
// 按指定size进行构造,size个空间,没有元素
Vector(int size = 0)
{
if (size == 0)
{
_first._ptr = _last._ptr = _end._ptr = nullptr;
}
else
{
_first._ptr = mAllocator.allocate(size * sizeof(T));
_last._ptr = _first._ptr;
_end._ptr = _first._ptr + size;
}
}
// 按指定size进行构造,添加size个元素,元素值是val
Vector(int size, const T &val)
{
_first._ptr = mAllocator.allocate(size * sizeof(T));
for (int i = 0; i < size; ++i)
{
mAllocator.construct(_first._ptr+i, val);
}
_last._ptr = _end._ptr = _first._ptr + size;
}
// 按[first, last)区间的元素来构造Vector
Vector(T *first, T *last)
{
int size = last - first;
_first._ptr = mAllocator.allocate(size * sizeof(T));
for (int i=0; first < last; ++first,++i)
{
mAllocator.construct(_first._ptr + i, *first);
}
_last._ptr = _end._ptr = _first._ptr + size;
}
~Vector()
{
// 析构有效的对象
for (T *p=_first._ptr; p < _last._ptr; ++p)
{
mAllocator.destroy(p);
}
// 释放内存
mAllocator.deallocate(_first._ptr);
}
// 从末尾添加元素
void push_back(const T &val)
{
if (full())
resize();
//mpVec[mCur++] = val;
mAllocator.construct(_last._ptr, val);
_last._ptr++;
}
// 从末尾删除元素
void pop_back()
{
if (empty())
return;
--_last._ptr;
mAllocator.destroy(_last._ptr);
}
bool full()const { return _last == _end; }
bool empty()const { return _last == _first; }
// 返回容器元素的个数
int size()const { return _last - _first; }
// Vector的迭代器
class iterator
{
public:
// 定义友元类
friend class Vector<T>;
iterator(T *p = nullptr)
:_ptr(p) {}
bool operator!=(const iterator &it)const
{
return _ptr != it._ptr;
}
bool operator==(const iterator &it)const
{
return _ptr == it._ptr;
}
int operator-(const iterator &it)const
{
return _ptr - it._ptr;
}
void operator++() { _ptr++; }
void operator--() { _ptr--; }
T& operator*() { return *_ptr; }
private:
T *_ptr; // size
};
iterator begin() { return iterator(_first._ptr); }
iterator end() { return iterator(_first._ptr + size()); }
// 给it迭代器的位置,插入一个值为val的对象,返回插入位置的新的迭代器
iterator insert(iterator it, const T &val)
{
// _first _last [_first, _last] _end
if (_last == _end)
{
int offset = it._ptr - _first._ptr;
resize();
it._ptr = _first._ptr + offset;
}
for (T *p = _last._ptr-1; p >= it._ptr; --p)
{
mAllocator.construct(p+1, *p);
mAllocator.destroy(p);
}
++_last;
mAllocator.construct(it._ptr, val);
return it;
}
// 删除it迭代器指向的位置,返回删除位置的最新的迭代器
iterator erase(iterator it) // it _ptr size:10
{
for (T *p = it._ptr; p < _last._ptr; ++p)
{
mAllocator.destroy(p);
mAllocator.construct(p, *(p+1));
}
--_last; // _ptr-- size--
return it; // 为什么it没变,要进行返回
}
private:
iterator _first; // 指向起始位置
iterator _last; // 最后一个元素的下一个位置
iterator _end; // 指向末尾的下一个位置
allocator mAllocator; // 存储容器的空间配置器
// 容器内存2倍扩容
void resize()
{
if (_first._ptr == nullptr)
{
_first._ptr = mAllocator.allocate(sizeof(T));
_last._ptr = _first._ptr;
_end._ptr = _first._ptr + 1;
}
else
{
int size = _last._ptr - _first._ptr;
T *ptmp = mAllocator.allocate(2 * sizeof(T) * size);
for (int i = 0; i < size; ++i)
{
mAllocator.construct(ptmp+i, _first._ptr[i]);
}
for (int i = 0; i < size; ++i)
{
mAllocator.destroy(_first._ptr + i);
}
mAllocator.deallocate(_first._ptr);
_first._ptr = ptmp;
_last._ptr = _first._ptr + size;
_end._ptr = _last._ptr + size;
}
}
};
class A
{
public:
A() :p(new int[2]) { cout << "A()" << endl; }
A(const A &src) { cout << "A(const A&)" << endl; }
~A() { cout << "~A()" << endl; }
private:
int *p;
};
int main()
{
Vector<int> vec;
for (int i = 0; i < 20; ++i)
{
vec.push_back(rand() % 100);
}
// foreach遍历
for (int val : vec)
{
cout << val << " ";
}
cout << endl;
Vector<int>::iterator it1 = vec.begin();
for (; it1 != vec.end(); ++it1)
{
cout << *it1 << " ";
}
cout << endl;
vec.insert(vec.begin(), 100);
it1 = vec.begin();
for (; it1 != vec.end(); ++it1)
{
cout << *it1 << " ";
}
cout << endl;
vec.erase(vec.begin());
it1 = vec.begin();
for (; it1 != vec.end(); ++it1)
{
cout << *it1 << " ";
}
cout << endl;
#if 0
A a1, a2, a3;
cout << "------------" << endl;
// 这里只需要空间,不需要构造对象 malloc
Vector<A> vec(100);
vec.push_back(a1);
vec.push_back(a2);
vec.pop_back();
vec.push_back(a3);
Vector<int> vec1; // 底层不开辟空间
//vec1.push_back(10); // 0 - 1 - 2 - 4 - 8 - 16 - 32 - 64 - 128
//vec1.push_back(20);
for (int i = 0; i < 20; ++i)
{
vec1.push_back(rand() % 100 + 1);
}
cout << vec1.size() << endl;
// 用通用的迭代器遍历方式,遍历vec1,并打印容器中所有的元素值
Vector<int>::iterator it1 = vec1.begin();
for (; it1 != vec1.end(); ++it1)
{
cout << *it1 << " ";
}
cout << endl;
Vector<int> vec2(10, 20);
int arr[] = { 12,4,56,7,89 };
Vector<int> vec3(arr, arr + sizeof(arr) / sizeof(arr[0]));
#endif
return 0;
}