[C++] Hash (simulating the implementation of unordered series containers)

1. Transformation of hash table

1. Modification of template parameter list

  • K:关键码类形
  • V: Different containers V have different types. If it is unordered_map, V represents a key-value pair; if it is unordered_set, V is K.
  • KeyOfValue: Because the type of V is different, the way to get the key through value is different. The key value is obtained through the type of T.
  • HF: Hash function functor object type. The hash function uses the division and remainder method. It is necessary to convert the type Key that cannot be modulo into size_t (integer) that can be modulo. number).
template<class K, class V, class KeyOfValue, class HF = DefHashF<T>>
class HashBucket;

2. Add iterator operation

// 为了实现简单,在哈希桶的迭代器类中需要用到hashBucket本身,
template<class K, class V, class KeyOfValue, class HF>
class HashBucket;

// 注意:因为哈希桶在底层是单链表结构,所以哈希桶的迭代器不需要--操作
template <class K, class V, class KeyOfValue, class HF>
struct HBIterator
{
    // 类型重命名
    typedef HashBucket<K, V, KeyOfValue, HF> HashBucket; // 哈希表
    typedef HashBucketNode<V> Node;                      // 哈希表节点
    typedef HBIterator<K, V, KeyOfValue, HF> Self;       // 迭代器

    // 构造函数(迭代器由节点指针和哈希表指针来构造)
    HBIterator(Node* node = nullptr, HashBucket* pHt = nullptr);

    // 相关运算符重载
    // 1、让迭代器可以移动,前置++运算符的重载(单向迭代器,不支持--操作)
    Self& operator++() // 前置++,返回指向下一个节点的迭代器的引用
    {
        // 遍历当前桶的节点
        // 1.当前节点的下一个节点不为空
        if (_node->_next)
        {
            _node = _node->_next; // _node指向下一个节点
        }
        // 2.当前节点的下一个节点为空,说明走到当前哈希桶的桶底了
        else
        {
            // 先通过哈希函数计算出当前桶的位置(先取出key再进行取模)
            size_t index = Hash()(KeyOfT()(_node->_data)) % _ht->_tables.size();

            // 从下一个位置开始,遍历哈希表,找到下一个不为空的哈希桶
            index++;
            while (index < _ht->_tables.size())
            {
                if (_ht->_tables[index]) // 找到下一个不为空的桶了
                {
                    _node = _ht->_tables[index]; // _node指向该桶的第一个节点
                    return *this;
                }
                index++;
            }
            // 后面没有不为空的桶了
            _node = nullptr; // _node指向空
        }
        return *this; // 返回下一个节点的迭代器的引用
    }

    Self operator++(int);

    // 2、让迭代器具有类似指针的行为
    V& operator*() // *解引用操作符
    {
        return _node->_data; // 返回迭代器指向节点中数据的引用
    }

    V* operator->() // ->成员访问(指针)操作符
    {
        return &_node->_data; // 返回迭代器指向节点中数据的地址
    }

    // 让迭代器可以比较
    bool operator==(const Iter& it) const
    {
        return _node == it._node; // 比较两个迭代器中的节点指针,看是否指向同一节点
    }

    bool operator!=(const Iter& it) const
    {
        return _node != it._node; // 比较两个迭代器中的节点指针,看是否指向同一节点
    }

    Node _node;     // 当前迭代器关联的节点
    HashBucket* _pHt; // 哈希桶--主要是为了找下一个空桶时候方便
};

Note: The hash table iterator encapsulates node pointers and , because you need to find the next bucket in the hash table, you need to use the hash table pointer. 

 [Idea] Overloading of the preceding ++ operator (one-way iterator, does not support -- operation)

  • If the nodes of the current bucket have not been traversed completely, let the node pointer _node point to the next node.
  • If the nodes of the current bucket have been traversed, let the node pointer _node point to the first node of the next bucket that is not empty.


3. Define the structure of the hash table

An array that stores the addresses of the head nodes of each linked list.

Note:There is a private member access problem, we encapsulate one in the hash table iterator, when a bucket traversal is completed, it is used to find the next bucket in the hash table, but the hash table _tables is a private member and cannot be accessed, so The iterator class template needs to be declared as a friend.

K: The type of key value key.
T: The type of data, if it is unordered_set, it is key, if it is unordered_map, it is pair<const key, V>.
Hash: Functor class that converts types that cannot be modulo into size_t types that can be modulo.
KeyOfT: Functor class, which obtains the key value through the type of T.

// 定义哈希表结构
template<class K, class T, class Hash, class KeyOfT>
class HashTable
{
    typedef HashNode<T> Node; // 哈希表节点

    // 声明迭代器类模板为友元,因为迭代器中的哈希表指针要访问哈希表的私有成员_tables
    // 写法一:
    template<class K, class T, class Hash, class KeyOfT>
    friend struct HTIterator;
    // 写法二:
    friend struct HTIterator<K, T, Hash, KeyOfT>;

public:
    // 迭代器
    typedef HTIterator<K, T, Hash, KeyOfT> iterator; // iterators内嵌类型
    iterator begin(); // 返回指向第一个节点的迭代器
    iterator end();   // 返回指向nullptr的迭代器
    
    // 构造、拷贝构造、赋值重载、析构函数
    HashTable() = default; // 默认生成,因为实现了拷贝构造函数,就不会再生成构造函数了
    HashTable(const HashTable& ht);
    HashTable& operator=(HashTable ht);
    ~HashTable();
    
    Node* Find(const K& key);                   // 查找节点
    pair<iterator, bool> Insert(const T& data); // 插入节点
    bool Erase(const K& key);                   // 删除节点
    
private:
    vector<Node*> _tables;  // 哈希表(存储着各个链表头结点的地址)
    size_t _n = 0;          // 哈希表中有效节点的个数(缺省为0)
};

① Implementation of begin() and end()
// 返回指向第一个节点的迭代器
iterator begin()
{
    // 遍历哈希表,找到第一个不为空的哈希桶
    for (size_t i = 0; i < _tables.size(); i++)
    {
        if (_tables[i])
        {
            // 注意:迭代器由节点指针和哈希表指针构造,而成员函数的this指针就是指向哈希表的
            return iterator(_tables[i], this); // 返回迭代器
        }
    }
    return end();
}

// 返回指向nullptr的迭代器
iterator end()
{
    // 注意:迭代器由节点指针和哈希表指针构造,而成员函数的this指针就是指向哈希表的
    return iterator(nullptr, this);
}

Note: this pointer points to the hash table object currently calling the begin() member function.


② Implementation of default member function
a. Implementation of constructor
HashTable() = default; // 默认生成,因为实现了拷贝构造函数,就不会再生成构造函数了

b. Implementation of copy constructor (deep copy)

It must be a deep copy. A shallow copy will cause the addresses of the same batch of hash buckets to be stored in the two hash tables.

  • Size the new hash table to ht._tables.size() .
  • Traverseht._tables and copy the nodes in the bucket one by one  to the corresponding position in the new hash table.
  • Change the number of valid nodes in the new hash table to ht._n.
// 拷贝构造
HashTable(const HashTable& ht)
{
    // 深拷贝,用已存在的对象ht去拷贝构造一个新对象
    // 将新哈希表大小调整为ht._tables.size()
    _tables.resize(ht._tables.size());

    // 遍历ht._tables的所有哈希桶,将桶里的节点一一拷贝到新哈希表对应位置上
    for (size_t i = 0; i < ht._tables.size(); i++)
    {
        Node* cur = ht._tables[i]; // 记录当前桶的位置
        while (cur) // 当前桶不为空,开始拷贝桶中的节点
        {
            Node* copy = new Node(cur->_data); // 申请一个新节点

            copy->_next = _tables[i]; // 将新节点插入到新哈希表中
            _tables[i] = copy;

            cur = cur->_next; // 继续遍历下一个节点
        }
    }
    // 更改新哈希表中的有效节点个数
    _n = ht._n;
}

c. Implementation of assignment operator overloaded function (modern writing)

Call the copy constructor indirectly through parameters, and then exchange the two member variables of the copy-constructed hash table and the current hash table respectively, so that the current hash table gets the content you want. When the function is called After completion, the hash table ht constructed from the copy will be automatically destructed when it goes out of scope.

// 赋值运算符重载
HashTable& operator=(HashTable ht)
{
    // 传参时,调用拷贝构造函数,拷贝构造了一个哈希表ht
    // 将拷贝构造出来的哈希表ht和当前哈希表的两个成员变量分别进行交换
    _tables.swap(ht._tables);
    _n = ht._n;

    return *this; // 返回当前哈希表
}

d. Implementation of destructor 
// 析构函数
~HashTable()
{
    // 遍历哈希表,找到不为空的哈希桶
    for (size_t i = 0; i < _tables.size(); i++)
    {
        Node* cur = _tables[i]; // 记录当前桶的位置
        
        while (cur) // 当前桶不为空,开始释放桶中的所有节点
        {
            Node* next = cur->_next; // 记录cur指向节点的下一个节点

            delete cur; // 释放节点

            cur = next; // 继续去释放下一个节点
        }
        
        _tables[i] = nullptr; // 当前桶释放完毕,置空
    }

    _n = 0; // 有效节点个数置0
}

4. Implement the relevant interfaces of the hash table

① Find nodes

In the search node interface, you need to obtaindifferent types of elements key , because the comparison between elements is based on the key value, and the hash position is calculated by taking the modulus of the element key code key.

So you need to use two functor classes:

  • Functor class KeyOfT, obtains the key value of different types of data (if data is a key, take key, if it is a pair, take first)
  • Functor class Hash, the type of some element key codes cannot be directly modulo, and needs to be converted into a size_t type that can be modulo.
Node* Find(const K& key)
{
    // 1、先检查哈希表是否为空
    if (_tables.size() == 0)
    {
        return nullptr;
    }

    // 2、再通过哈希函数计算出该元素映射的哈希桶的位置
    size_t index = Hash()(key) % _tables.size();

    // 3、遍历该哈希桶,查找节点
    Node* cur = _tables[index]; // cur指向该哈希桶
    while (cur)                 // 遍历该哈希桶的所有节点
    {
        if (key == KeyOfT()(cur->_data)) // 取key出来比较
        {
            return cur; // 找到了,返回节点地址
        }

        // 继续往后遍历
        cur = cur->_next;
    }

    // 4、没找到,返回空
    return nullptr;
}

②Insert node

Insert into the node interface, you need to obtaindifferent types of elementskey , because the comparison between elements is based on the key value, and the hash position is calculated by taking the modulus of the element key code key.

So you need to use two functor classes:

  • Functor class KeyOfT, obtains the key value of different types of data (if data is a key, take key, if it is a pair, take first)
  • Functor class Hash, the type of some element key codes cannot be directly modulo, and needs to be converted into a size_t type that can be modulo.

The return value of the function is a pair<iterator, bool> type object:

  • The purpose is to simulate the operator[] operator overloaded function in unordered_set and unordered_map.

Insert element interface function: before inserting an element, it will check whether the element is already in the hash table through the key code of the element (redundancy is not allowed):

  • If it is, return: pair<an iterator pointing to the element, false>.
  • If not, insert the node first, and then return: pair<an iterator pointing to the element, true>.

T: The type of data. If it is unordered_set, it is key. If it is unordered_map, it is pair<const key, V>.

// 插入节点
pair<iterator, bool> Insert(const T& data)
{
    // 1、先检查哈希表是否需要扩容:表为空或者负载因子超过1
    if (_n == _tables.size())
    {
        // 计算新容量(按2倍扩)
        size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;

        // 开始扩容
        // 创建一个新表(局部变量)
        vector<Node*> newTables;
        newTables.resize(newSize);

        // 遍历完旧表中的所有节点,重新计算它在新表中的位置,转移到新表中
		// 这里是把旧表的节点转移到新表中,而不是构造新的节点插入到新表中
        for (size_t i = 0; i < _tables.size(); i++)
        {
            Node* cur = _tables[i]; // cur当前指向的哈希桶

            // 哈希桶不为空,开始转移哈希桶中的节点
            while (cur != nullptr)
            {
                // 保存cur指向节点的下一个节点
                Node* next = cur->_next;

                // 重新计算cur指向的旧表节点,映射到新表中的位置
                size_t index = Hash()(KeyOfT()(cur->_data)) % newSize; // 取key出来取模

                // 把cur指向的旧表节点,转移到新表中
                cur->_next = newTables[index];
                newTables[index] = cur;

                // 继续转移下一个旧表节点
                cur = next;
            }
            // 节点转移完毕,把当前哈希桶置空
            _tables[i] = nullptr;
        }
        // 旧表所有节点全部转移到新表中了,交换新表与旧表
        _tables.swap(newTables);
    }

    // 2、再通过哈希函数计算出待插入元素映射的哈希桶的位置
    size_t index = Hash()(KeyOfT()(data)) % _tables.size(); // 取key出来取模

    // 3、插入节点到该位置的哈希桶中
    // 先检查哈希桶中是否存在重复节点(因为不允许数据冗余)
    Node* cur = _tables[index]; // cur指向哈希桶的第一个节点
    while (cur)                 
    {
        // 存在重复节点,插入失败
        if (KeyOfT()(data) == KeyOfT()(cur->_data)) // 取key出来比较
        {
            // 构造pair<cur指向节点的迭代器, false>,进行返回
            return make_pair(iterator(cur, this), false);
        }
        cur = cur->_next;
    }

    // 开始头插
    Node* newNode = new Node(data);  // 申请新节点
    newNode->_next = _tables[index]; // 头插
    _tables[index] = newNode;
    _n++;                            // 有效节点个数+1

    // 插入成功
    // 构造pair<cur指向节点的迭代器, false>,进行返回
    return make_pair(iterator(newNode, this), true);
}

③ Delete node

In the deletion node interface, you need to obtaindifferent types of elements key , because the comparison between elements is based on the key value, and the hash position is calculated by taking the modulus of the element key code key.

So you need to use two functor classes:

  • Functor class KeyOfT, obtains the key value in different types of data (if data is a key, take key, if it is a pair, take first).
  • Functor class Hash, the type of some element key codes cannot be directly modulo, and needs to be converted into a size_t type that can be modulo.
bool Erase(const K& key)
{
    // 1、先判断哈希表是否为空
    if (_tables.size() == 0)
    {
        return false; // 表为空,删除失败
    }

    // 2、通过哈希函数计算出待删除节点所映射哈希桶的位置
    size_t index = Hash()(key) % _tables.size();

    // 3、遍历该哈希桶,查找待删除节点,以及它的前驱节点
    Node* cur = _tables[index];
    Node* prev = nullptr;
    while (cur)
    {
        // 找到该节点了
        if (key == KeyOfT()(cur->_data)) // 取key出来比较
        {
            if (cur == _tables[index]) // cur是头节点,进行头删
            {
                _tables[index] = cur->_next;
            }
            else // cur不是头节点
            {
                prev->_next = cur->_next;
            }

            delete cur;    // 删除节点
            cur = nullptr;
            _n--;          // 有效节点个数-1

            return true;   // 删除成功,返回true
        }
        // 继续往后遍历
        prev = cur;
        cur = cur->_next;
    }

    // 没有找到该节点,返回false
    return false;
}

5. Aiming at the modular problem of division leaving remainder method

Implement the hash table and calculate the hash function corresponding to the hash position of the element key code using the division and remainder method, which needs to be passed FunctionsConvert the type that cannot be modulo into the size_t type that can be modulo.

Here are the functors for common type modulo and string type modulo, which are placed in the global domain to facilitate use when simulating unordered_set and unordered_map.

// 仿函数(解决哈希函数采用除留余数法时,将不能取模的类型转换成可以取模的size_t类型)
// 默认仿函数类
template<class K>
struct HashFunc
{
    // 针对size_t类型和能够隐式类型转换成size_t的类型
    size_t operator()(const K& key)
    {
        return key;
    }
};

// 特化
template<>
struct HashFunc<string>
{
    // 把string类型转换成可以取模的size_t类型
    size_t operator()(const string& key)
    {
        size_t hash_key = 0;
        for (size_t i = 0; i < key.size(); i++)
        {
            hash_key *= 131;
            hash_key += key[i];
        }
        return hash_key;
    }
};

2. Simulation implementation of unordered_set

When implementing unordered_set, you only need to re-encapsulate the interface in hashbucket.

Define the structure of unordered_set:

  • K: The type of key value key.
  • Hash: Functor class that converts types that cannot be modulo into size_t types that can be modulo.
namespace winter
{
	template<class K, class Hash = HashFunc<K>>
	class unordered_set
	{
		// 仿函数类,获取key对象中的key
		struct KeyOfSet
		{
			const K& operator()(const K& key) const
			{
				return key;
			}
		};
        
	public:
		// 这里是要取HashTable<...>类模板里面定义的内嵌类型iterator,要注意:
		// 编译到这里的时候,类模板HashTable<K, K, Hash, KeyOfSet>可能还没有实例化成具体的类
		// 那么编译器就不认识这个类模板,更别说去它里面找iterator了
		// 所以要加typename,告诉编译器这是个类型,等它实例化了再去它里面找iterator
		typedef typename hash_bucket::HashTable<K, K, Hash, KeyOfSet>::iterator iterator;
		iterator begin()
		{
			return _ht.begin();
		}
		iterator end()
		{
			return _ht.end();
		}

		// 插入元素
		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}
	private:
        // 封装一张哈希表,因为要实现unordered_set,所以要传
        // 键值K、键值K、针对除留余数法取模问题的Hash仿函数,提取Key值的KeyOfSet仿函数
		hash_bucket::HashTable<K, K, Hash, KeyOfSet> _ht;
	};
}

3. Simulation implementation of unordered_map

When implementing unordered_map, you only need to re-encapsulate the interface in hashbucket.

Define the structure of unordered_map:

  • K: The type of key value key.
  • V: The data stored in unordered_map is of type d, pair<const key, V>.
  • Hash: Functor class that converts types that cannot be modulo into size_t types that can be modulo.

What is stored in unordered_map is the key-value pair of pair<K, V>, K is the key type, V is the value type, and HF is the hash function type.

template<class K, class V, class HF = DefHashF<K>>
class unordered_map
{
    typedef pair<K, V> ValueType;
    typedef HashBucket<K, ValueType, KeyOfValue, HF> HT;
    // 通过key获取value的操作
    struct KeyOfValue
    {
        const K& operator()(const ValueType& data)
        {
            return data.first;
        }
    };
public:
    typename typedef HT::Iterator iterator;
public:
    unordered_map(): _ht()
    {}

    iterator begin(){ return _ht.Begin();}
    iterator end(){ return _ht.End();}

    // capacity
    size_t size()const
    {
        return _ht.Size();
    }
    bool empty()const
    {
        return _ht.Empty();
    }

    // Acess
    V& operator[](const K& key)
    {
        return (*(_ht.InsertUnique(ValueType(key, V())).first)).second;
    }
    const V& operator[](const K& key)const;

    // lookup
    iterator find(const K& key)
    {
        return _ht.Find(key);
    }
    size_t count(const K& key)
    {
        return _ht.Count(key);
    }

    // modify
    pair<iterator, bool> insert(const ValueType& valye)
    {
        return _ht.Insert(valye);
    }
    iterator erase(iterator position)
    {
        return _ht.Erase(position);
    }

    // bucket
    size_t bucket_count()
    {
        return _ht.BucketCount();
    }
    size_t bucket_size(const K& key)
    {
        return _ht.BucketSize(key);
    }
private:
    HT _ht;
};

Guess you like

Origin blog.csdn.net/weixin_74531333/article/details/134191331