[Data structure] Hash bucket

  In the last blog , we briefly introduced the hash table and the way to resolve hash collisions; today we introduce another method to resolve hash collisions - open hashing . The open hash method is also called the chain address method. First, the hash function is used to calculate the hash address for the key code set. The key codes with the same address belong to the same subset. Each subset is called a bucket, and the elements in each bucket pass through. A singly linked list is linked, and the head nodes of each linked list form a vector. It may sound complicated, but it’s actually very simple. Just look at a picture and you’ll understand: Suppose the hash function is Hash(key)=key%10
write picture description here

  • If the hash function is cracked, the hash bucket may be attacked, thereby degenerating the hash bucket into a singly linked list. At this time, the search efficiency will be greatly reduced. We can replace the singly linked list with a red-black tree to improve the search efficiency. .
  • When the number of hash buckets equals the number of inserted nodes, we increase the capacity.

Here is my implementation of the hash bucket:

// 获得一个适合的素数
const int _PrimeSize = 28;//素数表
static const unsigned long _PrimeList[_PrimeSize] =
{
    53ul, 97ul, 193ul, 389ul, 769ul,
    1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
    49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
    1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
    50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
    1610612741ul, 3221225473ul, 4294967291ul
};
size_t GetNextPrime(size_t num)
{
    for (size_t i = 0; i < _PrimeSize; i++)
    {
        if (_PrimeList[i]>num)
            return _PrimeList[i];
    }
    return _PrimeList[_PrimeSize - 1];
}

template <class K>
class KeyToIntDef
{
public:
    size_t operator()(const K& key)
    {
        return key;
    }
};
// 将字符串转换成整型
static size_t BKDRHash(const char * str)
{
    unsigned int seed = 131; // 31 131 1313 13131 131313
    unsigned int hash = 0;
    while (*str)
    {
        hash = hash * seed + (*str++);
    }
    size_t ret = (hash & 0x7FFFFFFF);
    return ret;
}
class StringToInt
{
public:
    size_t operator()(const string& key)
    {
        return BKDRHash(key.c_str());
    }
};


#include <iostream>
#include <vector>
#include <string>
using namespace std;
template <class K, class V>
struct HashNode//结点
{
    pair<K, V> _kv;
    HashNode<K, V>* _pNext;
    HashNode(const pair<K, V>& kv)
        : _kv(kv)
        , _pNext(NULL)
    {}
};

template <class K, class V, class KeyToInt>
class HashTable;//前置声明
//迭代器
template <class K, class V, class KeyToInt = KeyToIntDef<K>>
struct HashTableIterator
{
    typedef HashNode<K, V> Node;
    typedef Node* PNode;
    typedef HashTableIterator<K, V, KeyToInt> Self;

    PNode _pCur;
    HashTable<K, V, KeyToInt>* _ht;
public:
    HashTableIterator()
        : _pCur(NULL)
        , _ht(NULL)
    {}

    HashTableIterator(const PNode pCur, HashTable<K, V, KeyToInt>* ht)
        : _pCur(pCur)
        , _ht(ht)
    {}

    HashTableIterator(Self& s)
        : _pCur(s._pCur)
        , _ht(s._ht)
    {}

    Self& operator++()
    {
        Next();
        return *this;
    }

    Self operator++(int)
    {
        HashTableIterator temp(*this);
        Next();
        return temp;
    }

    pair<K, V>& operator*()
    {
        return _pCur->_kv;
    }

    pair<K, V>* operator->()
    {
        return &(operator*());
    }

    bool operator==(const Self& s)
    {
        return _pCur == s._pCur;
    }

    bool operator!=(const Self& s)
    {
        return _pCur != s._pCur;
    }
private:
    void Next()
    {
        if (_pCur->_pNext)
            _pCur = _pCur->_pNext;
        else
        {//找下一个非空桶
            size_t bucketNo = _ht->HashFunc(_pCur->_kv.first) + 1;
            for (; bucketNo < _ht->_hashTable.capacity(); bucketNo++)
            {
                if (_ht->_hashTable[bucketNo])
                {
                    _pCur = _ht->_hashTable[bucketNo];
                    return;
                }
            }
            _pCur = NULL;//没有找到
        }
        return;
    }
};
//哈希桶
template <class K, class V, class KeyToInt = KeyToIntDef<K>>
class HashTable
{
    typedef HashNode<K, V> Node;
    typedef Node* PNode;
    friend HashTableIterator<K, V, KeyToInt>;
public:
    typedef HashTableIterator<K, V, KeyToInt> Iterator;
public:
    HashTable(size_t capacity = 10)
    {
        capacity = GetNextPrimer(capacity);
        _hashTable.resize(capacity);
        _size = 0;
    }

    Iterator Begin()
    {
        for (size_t bucketNo = 0; bucketNo < _hashTable.capacity(); bucketNo++)
        {
            if (_hashTable[bucketNo])
                return Iterator(_hashTable[bucketNo], this);
        }
        return Iterator(NULL, this);
    }

    Iterator End()
    {
        return Iterator(NULL, this);
    }
//////////////////////////插入,查找及删除/////////////////////////////////
    pair<Iterator, bool> InsertEqual(const pair<K, V>& kv)//允许重复
    {
        CheckCapacity();
        size_t bucketNo = HashFunc(kv.first);
        PNode pNewNode = new Node(kv);
        pNewNode->_pNext = _hashTable[bucketNo];//头插
        _hashTable[bucketNo] = pNewNode;
        _size++;
        return make_pair(Iterator(pNewNode, this), true);
    }

    pair<Iterator, bool> InsertUnique(const pair<K, V>& kv)//key值唯一
    {
        CheckCapacity();
        size_t bucketNo = HashFunc(kv.first);
        PNode pCur = _hashTable[bucketNo];
        while (pCur)
        {
            if (kv.first == pCur->_kv.first)
                return make_pair(Iterator(pCur, this), false);
            pCur = pCur->_pNext;
        }
        pCur = new Node(kv);
        pCur->_pNext = _hashTable[bucketNo];
        _hashTable[bucketNo] = pCur;
        _size++;
        return make_pair(Iterator(pCur, this), true);
    }

    Iterator Find(const K& key)
    {
        size_t bucketNo = HashFunc(key);
        PNode pCur = _hashTable[bucketNo];

        while (pCur)
        {
            if (pCur->_kv.first == key)
                return Iterator(pCur, this);
            pCur = pCur->_pNext;
        }
        return Iterator(NULL, this);
    }

    Iterator Erase(Iterator pos)//key值唯一
    {
        if (pos._pCur == NULL)
            return Iterator(NULL, this);
        size_t key = pos._pCur->_kv.first;
        size_t bucketNo = HashFunc(key);
        PNode pCur = _hashTable[bucketNo];
        PNode pPre = NULL;
        while (pCur)
        {
            if (pCur->_kv.first == key)
            {
                if (_hashTable[bucketNo] == pCur)//pCur是首元素结点
                    _hashTable[bucketNo] = pCur->_pNext;
                else
                    pPre->_pNext = pCur->_pNext;
                delete pCur;
                pCur = NULL;
                _size--;
                return Iterator(pPre, this);
            }
            else
            {
                pPre = pCur;
                pCur = pCur->_pNext;
            }
        }
        return Iterator(NULL, this);
    }

    size_t Erase(const K& key)//key值可以重复
    {
        size_t oldsize = _size;
        size_t bucketNo = HashFunc(key);
        PNode pCur = _hashTable[bucketNo];
        PNode pPre = NULL;
        while (pCur)
        {
            if (pCur->_kv.first == key)
            {
                if (pCur == _hashTable[bucketNo])
                {
                    _hashTable[bucketNo] = pCur->_pNext;
                    delete pCur;
                    pCur = _hashTable[bucketNo];
                }
                else
                {
                    pPre->_pNext = pCur->_pNext;
                    delete pCur;
                    pCur = pPre->_pNext;
                }
                _size--;
            }
            else
            {
                pPre = pCur;
                pCur = pPre->_pNext;
            }
        }
        if (oldsize == _size)
            return 0;
        else
            return oldsize - _size;
    }
///////////////////////////其它常用函数///////////////////////////////
    size_t Count(const K key)//值为key的元素个数
    {
        size_t count = 0;
        size_t bucketNo = HashFunc(key);
        PNode pCur = _hashTable[bucketNo];
        while (pCur)
        {
            if (pCur->_kv.first == key)
                count++;
            pCur = pCur->_pNext;
        }
        return count;
    }

    size_t BucketCount()const//桶的个数
    {
        return _hashTable.capacity();
    }

    size_t BucketSize(size_t bucketNo)const//桶内元素个数
    {
        size_t count = 0;
        PNode pCur = _hashTable[bucketNo];
        while (pCur)
        {
            count++;
            pCur = pCur->_pNext;
        }
        return count;
    }

    V& FindORInsert(const K& key)//查找值为key,如果找到了,返回对应的value,
                                 //如果没有找到插入该结点,返回缺省的value
    {
        Iterator ret = InsertUnique(make_pair(key, V())).first;
        return (*ret).second;
    }

    bool Empty()const//是否为空
    {
        return _size == 0;
    }

    size_t Size()const//插入的总数
    {
        return _size;
    }

    void Clear()//清空
    {
        for (size_t bucketNo = 0; bucketNo < _hashTable.capacity(); bucketNo++)
        {
            PNode pCur = _hashTable[bucketNo];
            while (pCur)
            {
                _hashTable[bucketNo] = pCur->_pNext;
                delete pCur;
                pCur = _hashTable[bucketNo];
                _size--;
            }
        }
    }

    ~HashTable()
    {
        Clear();
    }
private:
    size_t HashFunc(const K& key)//哈希函数
    {
        return KeyToInt()(key) % _hashTable.capacity();
    }

    void CheckCapacity()//扩容
    {
        size_t capacity = _hashTable.capacity();
        if (_size == capacity)
        {
            HashTable<K, V, KeyToInt> newHt(GetNextPrime(capacity));
            for (size_t bucketNo = 0; bucketNo < capacity; bucketNo++)
            {
                PNode pCur = _hashTable[bucketNo];
                while (pCur)
                {
                    newHt.InsertEqual(pCur->_kv);
                    pCur = pCur->_pNext;
                }
            }
            _hashTable.swap(newHt._hashTable);
        }
    }
private:
    vector<PNode> _hashTable;
    size_t _size;
};

void test()
{
    HashTable<int, int> ht;
    ht.InsertEqual(make_pair(1, 1));
    ht.InsertEqual(make_pair(5, 5));
    ht.InsertEqual(make_pair(15, 15));
    ht.InsertEqual(make_pair(15, 15));
    ht.InsertEqual(make_pair(35, 35));
    ht.InsertEqual(make_pair(9, 9));
    HashTable<int, int>::Iterator it = ht.Begin();
    if (!ht.Empty())
    {
        while (it != ht.End())
        {
            cout << it->first << " " ;
            cout << (*it).second << endl;
            it++;
        }
        cout << endl;
        cout << ht.BucketSize(5) << endl;
        cout << ht.BucketCount() << endl;
        cout << ht.Count(15) << endl;
    }
    it = ht.Begin();
    cout << ht.Erase(15) << endl;
    HashTable<int, int>::Iterator ret = ht.Find(1);
    ht.Erase(ret);
    cout << ht.Size() << endl;
    ht.Clear();

    HashTable<string, string, StringToInt> ht1;
    ht1.InsertUnique(make_pair("111", "111"));
    ht1.InsertUnique(make_pair("111", "111"));
    cout << ht1.FindORInsert("111") << endl;
}

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325627182&siteId=291194637