【C++】unordered map/set

改造LinkHashTable

首先放上改造好的框架，因为和红黑树的封装相近，就不展开详细介绍了

template<class K, class T, class KeyOfT, class HashFunc>
    class HashTable;
//在实际编写代码的过程中我们会发现，HashTable包含迭代器，迭代器又包含了HashTable，两者总是一前一后，所以我们必须要将两个结构体的声明写在前面，让编译器直到存在这样的类型
    template<class T>
    struct HashNode
    {
    
    
        HashNode(const T& data)
            :_data(data)
            , _next(nullptr)
        {
    
    }

        T _data;
        HashNode<T>* _next;
    };
     template<class K, class T, class KeyOfT, class HashFunc>
    class HashTable
    {
    
    
    public:
        template<class K, class T, class Ref, class Ptr, class KeyOfT, class HashFunc>
        friend struct HashIterator;
        //迭代器需要调用HashTable的私有成员_tables,所以要将iterator定义为HashTable的友元
        typedef HashNode<T> Node;
        typedef HashTable<K, T, KeyOfT, HashFunc> Self;
        
    public:
        typedef HashIterator<K, T, T&, T*, KeyOfT, HashFunc> iterator;
        
        HashTable()
        {
    
    }
        pair<iterator, bool> Insert(const T& data);
        iterator Find(const K& key);
        bool Erase(const K& key);
        bool Erase(iterator it);
        HashTable(const HashTable& ht);
        Self& operator=(Self ht);
        ~HashTable();
        iterator begin();
        iterator end();
  private:
        vector<Node*> _tables;
        size_t _n = 0;
    };

略过迭代器部分，我们发现哈希表的改变也就只有两部分，首先是结点的模板参数右两个K，V变成了现在的T，还有就是哈希表增加了一个模板参数KeyOfT，这两个模板参数的作用和红黑树中的完全一样，是为了让我们的哈希表不仅可以储存单类型
数据还可以储存pair
迭代器需要调用HashTable的私有成员_tables,所以要将iterator定义为HashTable的友元

迭代器的添加

 template<class K, class T, class Ref, class Ptr, class KeyOfT, class HashFunc>
    struct HashIterator
    {
    
    
        typedef HashNode<T> Node;
        typedef HashIterator<K, T, Ref, Ptr, KeyOfT, HashFunc> Self;

        HashIterator(Node* node, HashTable<K, T, KeyOfT, HashFunc>* pht)
            :_node(node)
            ,_ht(pht)
        {
    
    }
        Self& operator++();
        bool operator!=(const Self& ite)
        Ref operator*()  {
    
    return _node->_data;}
        Ptr operator->() {
    
    return &_node->_data;}
        Node* _node;
        HashTable<K, T, KeyOfT, HashFunc>* _ht;
    };

迭代器成员构建思路
迭代器的构建是封装的重中之重，最开始我们想到的一定是封装一个指针作为迭代器，这样就可以完成operator */->的构建。

但是如何构建operator++呢？ 结点在哈希桶中间还好，可以通过_next指针向下访问，但是若这个桶走完了呢？
这是我们想到存储以下这个哈希桶的头结点，但是我们的头节点并非是不变的，不管是插入或者是删除都可能引起头节点的改变。那么使用头节点的二级指针呢？ 我们发现也不行 ，若使用二级指针最后一个桶走完了该怎么判断呢？ 我们还得直到存储最后一个桶的首地址，不然很可能产生越界问题。但是前面的问题又来了，最后一个桶的首地址是可能变化的

经过上述思考我们发现存储哈希桶的首地址方法好像行不通，我们需要存储一个不会变化的东西作为基准，于是我们想到了哈希表的地址，不管是插入还是删除还是交换，都是对哈希表的成员_tables进行操作，而哈希表是不会变的，我们可以通过哈希表的指针来找到_tables进而找到需要遍历的下一个位置

在实际编写代码的过程中我们会发现，HashTable包含迭代器，迭代器又包含了HashTable，两者总是一前一后，所以我们必须要将两个结构体的声明写在前面，让编译器直到存在这样的类型

迭代器接口实现

因为迭代器的模板参数非常的多，所以模拟实现并未实现完全的声明定义分离
为方便观察我将迭代器实现中的几个重定义类型放在前面

typedef HashNode<T> Node;
typedef HashIterator<K, T, Ref, Ptr, KeyOfT, HashFunc> Self;

构造函数

 HashIterator(Node* node, HashTable<K, T, KeyOfT, HashFunc>* pht)
            :_node(node)
            ,_ht(pht)
        {
    
    }

operator++

这里存在几种情况：
1._node在哈希桶中间直接通过_next指针找到下一个结点的地址赋给_node就可以了
2._node在哈希桶的结尾此时我们要找下一个不为空的哈希桶，我们可以通过现有迭代器的_node直接算其所在桶的位置，然后对桶的下标进行++操作，找到先桶后第一个不为空的桶的头节点即是我们想要找的结点，若后面桶都为空导致index超出了下标的范围，说明已经走完，将_node 置为空

 Self& operator++()
        {
    
    
             //情况一
            if (_node->_next)
            {
    
    
                _node = _node->_next;
                return *this;
            }
            //情况二
            else
            {
    
    
                KeyOfT kot;
                HashFunc hs;
                size_t index = hs(kot(_node->_data)) % _ht->_tables.size();
                while (++index < _ht->_tables.size())
                {
    
      
                    if (_ht->_tables[index])
                    {
    
    
                        _node = _ht->_tables[index];
                        return *this;
                    }
                }
                //超出范围，表已经走完
                _node = nullptr;
                return *this;
            }
        }

operator–

次迭代器不支持operator–，这是由其底层结构决定的，·因为其vector每个结点下面挂的是单链表，所以向前遍历操作十分困难，又因为LInkHash并没有排序功能，所以构建反向迭代器和operator–并没有什么意义，所以在unordered_map/set也都不支持反向迭代器

operator!=

bool operator!=(const Self& ite)
        {
    
    
            if (_node == ite._node)
                return false;
            else
                return true;
        }

operator*/->

Ref operator*()  {
    
    return _node->_data;}
Ptr operator->() {
    
    return &_node->_data;}

哈希表接口实现

构造函数

HashTable()
        {
    
    }

为何要写一个空的构造函数呢？因为我们写了一个拷贝构造函数，函数创建时会默自动调用构造函数，若此时有我们自己写的构造函数优先调用，拷贝构造也是构造函数，所以如果不写构造函数编译器会自动调拷贝构造，导致函数参数不匹配而报错。所以我们还是要自己写一个构造函数，因为HashTable的成员都是库里的，其会调用自己的构造函数自己初始化

拷贝构造函数

 HashTable(const HashTable& ht)
        {
    
    
            _tables.resize(ht._tables.size());
            for (size_t i = 0; i < _tables.size(); i++)
            {
    
    
                Node* cur = ht._tables[i];
                Node* newprev = nullptr;
                Node* newnode = nullptr;
                while (cur)
                {
    
    
                    newprev = newnode;
                    newnode = new Node(cur->_data);
                    if (newprev)
                        newprev->_next = newnode;
                    else
                        _tables[i] = newnode;
                    cur = cur->_next;
                    _n++;
                }
                
            }
        }

从头遍历每一个哈希桶，将每一个结点依次拷贝链接到新表上。过程比较简单，看看代码应该没有问题

operator=

Self& operator=(Self ht)
        {
    
    
            _tables.swap(ht._tables);
            swap(_n, ht._n);
            return *this;
        }

函数传参调用拷贝构造深拷贝一个新的HashTable ht，然后交换新表和传进来的HashTable的成员就可以达到赋值的效果，形参出作用域后自动销毁

析构函数

~HashTable()
        {
    
    
            for (size_t i = 0; i < _tables.size(); i++)
            {
    
    
                Node* cur = _tables[i];
                while (cur)
                {
    
    
                    Node* next = cur->_next;
                    delete cur;
                    cur = next;
                }
                _tables[i] = nullptr;

            }
        }

依次遍历每一个桶，释放每一个结点，将每个桶的头节点置空，最后vector会自动调用自己的析构函数销毁自己

begin() / end()

 iterator begin()
        {
    
    
            for (size_t i = 0; i < _tables.size(); i++)
            {
    
    
                if (_tables[i])
                    return iterator(_tables[i], this);
            }
            return iterator(nullptr, this);
        }

        iterator end()
        {
    
    
            return iterator(nullptr, this);
        }

begin(）就是依次遍历，找到第一个非空的桶，将其头节点传给迭代器就可以了，this指针就是表的地址，非常方便

接下来三个函数都是上一节提到过的，我们对其进行了部分改造，添加了迭代器等。基本思路都是一样的，就不展开细说了。
可以依照这前面的代码对照看看

Insert

 template<class K, class T, class KeyOfT, class Hash>
    pair<typename HashTable<K, T, KeyOfT, Hash>::iterator, bool>  HashTable<K, T, KeyOfT, Hash>::Insert(const T& data)
    {
    
    
        KeyOfT kot;
        if (!_tables.empty() && Find(kot(data))._node)
            return make_pair(Find(kot(data)), false);
        Hash hs;
        if (_n >= _tables.size())
        {
    
    
            //扩容
            size_t newSize = _tables.size() == 0 ? 10 : 2 * _tables.size();
            vector<Node*> newTables;
            newTables.resize(newSize);
            for (size_t i = 0; i < _tables.size(); i++)
            {
    
    
                Node* cur = _tables[i];
                while (cur)
                {
    
    
                    Node* next = cur->_next;
                    size_t index = hs(kot(cur->_data)) % _tables.size();
                    cur->_next = newTables[index];
                    newTables[index] = cur;
                    cur = next;
                }
                _tables[i] = nullptr;
            }
            _tables.swap(newTables);
        }
        size_t index = hs(kot(data)) % _tables.size();
        Node* NewNode = new Node(data);
        NewNode->_next = _tables[index];
        _tables[index] = NewNode;
        ++_n;
        return make_pair(iterator(NewNode, this), true);
    }

Erase

 template<class K, class T, class KeyOfT, class Hash>
    bool HashTable<K, T, KeyOfT, Hash>::Erase(const K& key)
    {
    
    
        Hash hs;
        size_t index = hs(key) % _tables.size();
        Node* cur = _tables[index];
        Node* prev = nullptr;
        while (cur && kot(cur->_data) != key)
        {
    
    
            prev = cur;
            cur = cur->_next;
        }
        if (cur == nullptr)
            return false;
        else if (prev == nullptr)
        {
    
    
            Node* next = cur->_next;
            _tables[index] = next;
            delete cur;
            cur = nullptr;
            return true;
        }
        else
        {
    
    
            prev->_next = cur->_next;
            delete cur;
            cur = nullptr;
            return true;
        }
    }
    template<class K, class T, class KeyOfT, class Hash>
    bool HashTable<K, T, KeyOfT, Hash>::Erase(iterator it)
    {
    
    
        return Erase(kot((it._node)->_data));
    }

Find

template<class K, class T, class KeyOfT, class Hash>
    typename HashTable<K, T, KeyOfT, Hash>::iterator HashTable<K, T, KeyOfT, Hash>::Find(const K& key)
    {
    
    
        if (_tables.empty())
            return iterator(nullptr, this);
        Hash hs;
        KeyOfT kot;
        size_t index = hs(key) % _tables.size();
        Node* start = _tables[index];
        while (start && kot(start->_data) != key)
        {
    
    
            start = start->_next;
        }
        return iterator(start, this);
    }

unordered_map的封装

template<class K, class V, class HashFunc = Hash<K>>
    class my_unordered_map
    {
    
    
    public:
        struct MapKeyOfT
        {
    
    
            const K& operator()(const pair<K, V>& kv) {
    
    
                return kv.first;
            }
        };
        typedef typename LinkHash::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc>::iterator iterator;
        iterator begin() {
    
     return _ht.begin(); }
        iterator end() {
    
     return _ht.end(); };
        pair<iterator, bool> insert(const pair<K, V>& kv) {
    
     return _ht.Insert(kv); }
        bool erase(const K& key) {
    
     return _ht.Erase(key); }
        V& operator[](const K& key)
        {
    
    
            auto ret = insert(make_pair(key, V()));
            return ret.first._node->_data.second;
        }
        //Find(const K& key);

    private:
        LinkHash::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc> _ht;

         
    };

unordered_set的封装

template<class K, class HashFunc = Hash<K>>
    class my_unordered_set
    {
    
    
    public:
        struct SetKeyOfT
        {
    
    
            const K& operator()(const K& key) {
    
    
                return key;
            }
        };
        typedef typename LinkHash::HashTable<K, K, SetKeyOfT, HashFunc>::iterator iterator;
        iterator begin() {
    
     return _ht.begin(); }
        iterator end() {
    
     return _ht.end(); };
        pair<iterator, bool> insert(const K& key) {
    
     return _ht.Insert(key); }
        bool erase(const K& key) {
    
     return _ht.Erase(key); }
   
        //Find(const K& key);

    private:
        LinkHash::HashTable<K, K, SetKeyOfT, HashFunc> _ht;
    };

在实际编写代码的过程中我们会发现，HashTable包含迭代器，迭代器又包含了HashTable，两者总是一前一后，所以我们必须要将两个结构体的声明写在前面，让编译器知道存在这样的类型

源码：

LinkHash.h

#pragma once

#include <iostream>
#include <vector>
#include <string>
using namespace std;

template<class K>
struct Hash
{
    
    
    size_t operator()(const K& key) {
    
     return key; }
};

template<>
struct Hash<string> {
    
    
    size_t operator()(const string& key) {
    
    
        int ret = 0;
        for (auto ch : key) {
    
    
            ret += ch;
            ret *= 31;
        }
        return ret;
    }
};
namespace LinkHash
{
    
    
    
    template<class K, class T, class KeyOfT, class HashFunc>
    class HashTable;

    template<class T>
    struct HashNode
    {
    
    
        HashNode(const T& data)
            :_data(data)
            , _next(nullptr)
        {
    
    }

        T _data;
        HashNode<T>* _next;
    };

    template<class K, class T, class Ref, class Ptr, class KeyOfT, class HashFunc>
    struct HashIterator
    {
    
    
        typedef HashNode<T> Node;
        typedef HashIterator<K, T, Ref, Ptr, KeyOfT, HashFunc> Self;

        HashIterator(Node* node, HashTable<K, T, KeyOfT, HashFunc>* pht)
            :_node(node)
            ,_ht(pht)
        {
    
    }
        Self& operator++()
        {
    
    
            if (_node->_next)
            {
    
    
                _node = _node->_next;
                return *this;
            }
            else
            {
    
    
                KeyOfT kot;
                HashFunc hs;
                size_t index = hs(kot(_node->_data)) % _ht->_tables.size();
                while (++index < _ht->_tables.size())
                {
    
      
                    if (_ht->_tables[index])
                    {
    
    
                        _node = _ht->_tables[index];
                        return *this;
                    }
                }
                _node = nullptr;
                return *this;
            }
        }

        bool operator!=(const Self& ite)
        {
    
    
            if (_node == ite._node)
                return false;
            else
                return true;
        }

        Ref operator*()
        {
    
    
            return _node->_data;
        }

        Ptr operator->()
        {
    
    
            return &_node->_data;
        }
        Node* _node;
        HashTable<K, T, KeyOfT, HashFunc>* _ht;
    };

    template<class K, class T, class KeyOfT, class HashFunc>
    class HashTable
    {
    
    
    public:
        template<class K, class T, class Ref, class Ptr, class KeyOfT, class HashFunc>
        friend struct HashIterator;
        typedef HashNode<T> Node;
        typedef HashTable<K, T, KeyOfT, HashFunc> Self;
        
    public:
        typedef HashIterator<K, T, T&, T*, KeyOfT, HashFunc> iterator;
        
        HashTable()
        {
    
    }
        pair<iterator, bool> Insert(const T& data);
        iterator Find(const K& key);
        bool Erase(const K& key);
        bool Erase(iterator it);
        HashTable(const HashTable& ht)
        {
    
    
            _tables.resize(ht._tables.size());
            for (size_t i = 0; i < _tables.size(); i++)
            {
    
    
                Node* cur = ht._tables[i];
                Node* newprev = nullptr;
                Node* newnode = nullptr;
                while (cur)
                {
    
    
                    newprev = newnode;
                    newnode = new Node(cur->_data);
                    if (newprev)
                        newprev->_next = newnode;
                    else
                        _tables[i] = newnode;
                    cur = cur->_next;
                    _n++;
                }
                
            }
        }

        Self& operator=(Self ht)
        {
    
    
            _tables.swap(ht._tables);
            swap(_n, ht._n);
            return *this;
        }

        ~HashTable()
        {
    
    
            for (size_t i = 0; i < _tables.size(); i++)
            {
    
    
                Node* cur = _tables[i];
                while (cur)
                {
    
    
                    Node* next = cur->_next;
                    delete cur;
                    cur = next;
                }
                _tables[i] = nullptr;

            }
        }

        iterator begin()
        {
    
    
            for (size_t i = 0; i < _tables.size(); i++)
            {
    
    
                if (_tables[i])
                    return iterator(_tables[i], this);
            }
            return iterator(nullptr, this);
        }

        iterator end()
        {
    
    
            return iterator(nullptr, this);
        }
        
       
    private:
        vector<Node*> _tables;
        size_t _n = 0;
    };

    template<class K, class T, class KeyOfT, class Hash>
    pair<typename HashTable<K, T, KeyOfT, Hash>::iterator, bool>  HashTable<K, T, KeyOfT, Hash>::Insert(const T& data)
    {
    
    
        KeyOfT kot;
        if (!_tables.empty() && Find(kot(data))._node)
            return make_pair(Find(kot(data)), false);
        Hash hs;
        if (_n >= _tables.size())
        {
    
    
            //扩容
            size_t newSize = _tables.size() == 0 ? 10 : 2 * _tables.size();
            vector<Node*> newTables;
            newTables.resize(newSize);
            for (size_t i = 0; i < _tables.size(); i++)
            {
    
    
                Node* cur = _tables[i];
                while (cur)
                {
    
    
                    Node* next = cur->_next;
                    size_t index = hs(kot(cur->_data)) % _tables.size();
                    cur->_next = newTables[index];
                    newTables[index] = cur;
                    cur = next;
                }
                _tables[i] = nullptr;
            }
            _tables.swap(newTables);
        }
        size_t index = hs(kot(data)) % _tables.size();
        Node* NewNode = new Node(data);
        NewNode->_next = _tables[index];
        _tables[index] = NewNode;
        ++_n;
        return make_pair(iterator(NewNode, this), true);
    }

    template<class K, class T, class KeyOfT, class Hash>
    typename HashTable<K, T, KeyOfT, Hash>::iterator HashTable<K, T, KeyOfT, Hash>::Find(const K& key)
    {
    
    
        if (_tables.empty())
            return iterator(nullptr, this);
        Hash hs;
        KeyOfT kot;
        size_t index = hs(key) % _tables.size();
        Node* start = _tables[index];
        while (start && kot(start->_data) != key)
        {
    
    
            start = start->_next;
        }
        return iterator(start, this);
    }

    template<class K, class T, class KeyOfT, class Hash>
    bool HashTable<K, T, KeyOfT, Hash>::Erase(const K& key)
    {
    
    
        Hash hs;
        size_t index = hs(key) % _tables.size();
        Node* cur = _tables[index];
        Node* prev = nullptr;
        while (cur && kot(cur->_data) != key)
        {
    
    
            prev = cur;
            cur = cur->_next;
        }
        if (cur == nullptr)
            return false;
        else if (prev == nullptr)
        {
    
    
            Node* next = cur->_next;
            _tables[index] = next;
            delete cur;
            cur = nullptr;
            return true;
        }
        else
        {
    
    
            prev->_next = cur->_next;
            delete cur;
            cur = nullptr;
            return true;
        }
    }

    template<class K, class T, class KeyOfT, class Hash>
    bool HashTable<K, T, KeyOfT, Hash>::Erase(iterator it)
    {
    
    
        return Erase(kot((it._node)->_data));
    }
    void TestHashTable()
    {
    
    
        //HashTable<int, int> ht;
        //int arr[] = { 1, 11, 21, 31, 5, 6, 7, 8, 9,10, 44 };
        //for (auto e : arr)
        //{
    
    
        //    ht.Insert(make_pair(e, e));
        //}
        //cout << endl;
    }



}

unordered_map.h

#pragma once
#include "LinkHash.h"

namespace clx
{
    
    
    template<class K, class V, class HashFunc = Hash<K>>
    class my_unordered_map
    {
    
    
    public:
        struct MapKeyOfT
        {
    
    
            const K& operator()(const pair<K, V>& kv) {
    
    
                return kv.first;
            }
        };
        typedef typename LinkHash::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc>::iterator iterator;
        iterator begin() {
    
     return _ht.begin(); }
        iterator end() {
    
     return _ht.end(); };
        pair<iterator, bool> insert(const pair<K, V>& kv) {
    
     return _ht.Insert(kv); }
        bool erase(const K& key) {
    
     return _ht.Erase(key); }
        V& operator[](const K& key)
        {
    
    
            auto ret = insert(make_pair(key, V()));
            return ret.first._node->_data.second;
        }
        //Find(const K& key);

    private:
        LinkHash::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc> _ht;

         
    };
    void my_unordered_map_test()
    {
    
    
        my_unordered_map<int, int> um1;
        um1.insert(make_pair(1, 1));
        um1.insert(make_pair(11, 11));
        um1.insert(make_pair(21, 21));
        um1.insert(make_pair(31, 31));
        my_unordered_map<int, int> um2(um1);
        auto it = um1.begin();
        
        while (it != um1.end())
        {
    
    
            cout << it->first << it->second << endl;
            ++it;
        }

        it = um2.begin();

        while (it != um2.end())
        {
    
    
            cout << it->first << it->second << endl;
            ++it;
        }
        cout << endl;
    }
}

unordered_set.h

#pragma once
#include "LinkHash.h"

namespace clx
{
    
    
    template<class K, class HashFunc = Hash<K>>
    class my_unordered_set
    {
    
    
    public:
        struct SetKeyOfT
        {
    
    
            const K& operator()(const K& key) {
    
    
                return key;
            }
        };
        typedef typename LinkHash::HashTable<K, K, SetKeyOfT, HashFunc>::iterator iterator;
        iterator begin() {
    
     return _ht.begin(); }
        iterator end() {
    
     return _ht.end(); };
        pair<iterator, bool> insert(const K& key) {
    
     return _ht.Insert(key); }
        bool erase(const K& key) {
    
     return _ht.Erase(key); }
   
        //Find(const K& key);

    private:
        LinkHash::HashTable<K, K, SetKeyOfT, HashFunc> _ht;
    };
}

test.h

#include "unordered_map.h"

int main()
{
    
    
	clx::my_unordered_map_test();
	return 0;
}