HashMap底层原理学习（JDK1.8）

1、简介

hashMap所继承或者实现的接口
在这里插入图片描述
HashMap中有个Node的内部类，这其实就是一个单链表的结构

和TreeNode，传说中的红黑树

其他成员：
transient Node<K,V>[] table; 存储hash节点的数组
transient Set<Map.Entry<K,V>> entrySet; 将数组转为Set结构，方便keySet() 和 values()可以直接获取数据。
transient int size; 数据总数
transient int modCount; 修改次数
int threshold;需要进行resize的阈值，一般为capacity * load factor
final float loadFactor;hash表的装载因子
static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; 默认容量，16
static final int MAXIMUM_CAPACITY = 1 << 30; 最大容量，2^30
static final float DEFAULT_LOAD_FACTOR = 0.75f; 默认装载因子
static final int TREEIFY_THRESHOLD = 8;将单链表结构转为树结构的阈值，也就是说当链表长度达到8的时候，将单链表转为树结构存储
static final int UNTREEIFY_THRESHOLD = 6;将树结构转回单链表的阈值
static final int MIN_TREEIFY_CAPACITY = 64;进行转树结构操作的最小容量

2、构造方法

无参构造：仅指定默认装载因子

public HashMap() {
        this.loadFactor = DEFAULT_LOAD_FACTOR; // all other fields defaulted
    }

可执行初始容量，装载因子也是默认0.75

public HashMap(int initialCapacity) {
        this(initialCapacity, DEFAULT_LOAD_FACTOR);
    }

可执行初始容量和装载因子，初始容量不能超过MAXIMUM_CAPACITY。根据初始容量计算阈值，tableSizeFor在找一个最接近初始容量的2的幂。

public HashMap(int initialCapacity, float loadFactor) {
        if (initialCapacity < 0)
            throw new IllegalArgumentException("Illegal initial capacity: " +
                                               initialCapacity);
        if (initialCapacity > MAXIMUM_CAPACITY)
            initialCapacity = MAXIMUM_CAPACITY;
        if (loadFactor <= 0 || Float.isNaN(loadFactor))
            throw new IllegalArgumentException("Illegal load factor: " +
                                               loadFactor);
        this.loadFactor = loadFactor;
        this.threshold = tableSizeFor(initialCapacity);
    }
    static final int tableSizeFor(int cap) {
        int n = cap - 1;
        n |= n >>> 1;
        n |= n >>> 2;
        n |= n >>> 4;
        n |= n >>> 8;
        n |= n >>> 16;
        return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
    }

根据Map构建一个新的HashMap，装载因子依然默认0.75，根据map的大小来计算阈值。

public HashMap(Map<? extends K, ? extends V> m) {
        this.loadFactor = DEFAULT_LOAD_FACTOR;
        putMapEntries(m, false);
    }
    final void putMapEntries(Map<? extends K, ? extends V> m, boolean evict) {
        int s = m.size();
        if (s > 0) {
            if (table == null) { // pre-size
                float ft = ((float)s / loadFactor) + 1.0F;
                int t = ((ft < (float)MAXIMUM_CAPACITY) ?
                         (int)ft : MAXIMUM_CAPACITY);
                if (t > threshold)
                    threshold = tableSizeFor(t);
            }
            else if (s > threshold)
                resize();
            for (Map.Entry<? extends K, ? extends V> e : m.entrySet()) {
                K key = e.getKey();
                V value = e.getValue();
                putVal(hash(key), key, value, false, evict);
            }
        }
    }

3、put方法

public V put(K key, V value) {
        return putVal(hash(key), key, value, false, true);
    }

首先是计算key的hash值

static final int hash(Object key) {
        int h;
        return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
    }

(h = key.hashCode()) ^ (h >>> 16)：h >>> 16是将hashCode右移16位的意思，然后将hashCode自身和它右移16位后的结果进行异或，使得hashCode的高16位和低16均参与hash的计算，使得key的hash值更加随机化，也就是让整个hash表更加散列化。
下标的计算(n - 1) & hash：table的长度都是2的幂，假设为16，则n-1为1111，与hash进行与操作，结果只会保留最后四位，这其实与对n取模效果是一样的，但是计算效率会比对n取模更快。这也是为什么每次扩容都要保证table的长度为2的幂的原因。
接下来看putVal

final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
                   boolean evict) {
        Node<K,V>[] tab; Node<K,V> p; int n, i;
        if ((tab = table) == null || (n = tab.length) == 0)
            n = (tab = resize()).length;//第一次put时table为空，一定会进行一次resize
        if ((p = tab[i = (n - 1) & hash]) == null)  //计算当前key的下标i = (n - 1) & hash，若该下标对应的位置为空，则说明没有冲突，直接放入数组中
            tab[i] = newNode(hash, key, value, null);
        else {
            Node<K,V> e; K k;
            if (p.hash == hash &&
                ((k = p.key) == key || (key != null && key.equals(k)))) //找到一个已存在的元素与当前要put的key的hash相同，key也相同，则是要覆盖这个p的value。
                e = p;
            else if (p instanceof TreeNode)  //如果当前这个下标对应的节点已经是一个树节点，则以树的形式插入
                e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
            else {  //都不满足则说明已存在冲突，但是还未将单链表转为树，在链表后面插入
                for (int binCount = 0; ; ++binCount) {
                    if ((e = p.next) == null) {
                        p.next = newNode(hash, key, value, null);
                        if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                            treeifyBin(tab, hash); //若链表的长度达到了转树的条件，则将当前链表转换一颗树
                        break;
                    }
                    if (e.hash == hash &&
                        ((k = e.key) == key || (key != null && key.equals(k))))
                        break;
                    p = e;
                }
            }
            if (e != null) { // existing mapping for key，若当前key已存在，则更新value
                V oldValue = e.value;
                if (!onlyIfAbsent || oldValue == null)
                    e.value = value;
                afterNodeAccess(e);
                return oldValue;
            }
        }
        ++modCount;
        if (++size > threshold)
            resize();
        afterNodeInsertion(evict);
        return null;
    }

下面看treeifyBin，可看出并不是长度超过8就转树，进一步判断若当前table的大小小于MIN_TREEIFY_CAPACITY则只进行扩容。

final void treeifyBin(Node<K,V>[] tab, int hash) {
        int n, index; Node<K,V> e;
        if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY)
            resize();
        else if ((e = tab[index = (n - 1) & hash]) != null) {
            TreeNode<K,V> hd = null, tl = null;
            do {
                TreeNode<K,V> p = replacementTreeNode(e, null);
                if (tl == null)
                    hd = p;
                else {
                    p.prev = tl;
                    tl.next = p;
                }
                tl = p;
            } while ((e = e.next) != null);
            if ((tab[index] = hd) != null)
                hd.treeify(tab);
        }
    }

4、扩容

当第一次put或者size超过阈值，或者某条冲突的链表超过8但是数组总数未超过MIN_TREEIFY_CAPACITY时会进行扩容resize

final Node<K,V>[] resize() {
        Node<K,V>[] oldTab = table;
        int oldCap = (oldTab == null) ? 0 : oldTab.length;
        int oldThr = threshold;
        int newCap, newThr = 0;
        if (oldCap > 0) {
            if (oldCap >= MAXIMUM_CAPACITY) {  //当扩容前的大小已经达到MAXIMUM_CAPACITY时，则将阈值设置为Integer的最大值，但是table没有改变，仅改变了阈值。
                threshold = Integer.MAX_VALUE;
                return oldTab;
            }
            else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
                     oldCap >= DEFAULT_INITIAL_CAPACITY)  //当旧容量扩大两倍后还未超过MAXIMUM_CAPACITY，且旧容量大于等于初始容量16时，将阈值设置为原来的两倍。
                newThr = oldThr << 1; // double threshold
        }
        else if (oldThr > 0) // initial capacity was placed in threshold，第一次put时可能oldCap为0，若就的阈值不为0是，将新容量设置为当前阈值。
            newCap = oldThr;
        else {               // zero initial threshold signifies using defaults，oldCap和oldThr都为0时，将newCap设置为16，newThr设置为16*0.75=12
            newCap = DEFAULT_INITIAL_CAPACITY;
            newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
        }
        if (newThr == 0) {
            float ft = (float)newCap * loadFactor;
            newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
                      (int)ft : Integer.MAX_VALUE);
        }
        threshold = newThr;
        @SuppressWarnings({"rawtypes","unchecked"})
            Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
        table = newTab;
        if (oldTab != null) {
            for (int j = 0; j < oldCap; ++j) {  //复制原数组
                Node<K,V> e;
                if ((e = oldTab[j]) != null) {
                    oldTab[j] = null;
                    if (e.next == null)  //没有冲突，直接复制，复制时重新根据newCap计算下标
                        newTab[e.hash & (newCap - 1)] = e;
                    else if (e instanceof TreeNode)
                        ((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
                    else { // preserve order
                        Node<K,V> loHead = null, loTail = null;
                        Node<K,V> hiHead = null, hiTail = null;
                        Node<K,V> next;
                        do {
                            next = e.next;
                            if ((e.hash & oldCap) == 0) {//判断hash的高一位是否为1
                                if (loTail == null)
                                    loHead = e;
                                else
                                    loTail.next = e;
                                loTail = e;
                            }
                            else {
                                if (hiTail == null)
                                    hiHead = e;
                                else
                                    hiTail.next = e;
                                hiTail = e;
                            }
                        } while ((e = next) != null);
                        if (loTail != null) {
                            loTail.next = null;
                            newTab[j] = loHead;
                        }
                        if (hiTail != null) {
                            hiTail.next = null;
                            newTab[j + oldCap] = hiHead;
                        }
                    }
                }
            }
        }
        return newTab;
    }

例如将原大小为16的扩容为32：
原下标为5的可能101&1111=5也可能是10101&1111=5
扩容时重新计算下标101&11111=5，而10101&11111=21
当然处理下标为j，旧容量为oldCap，也就是说当处理j这个位置对应的链表时，这些Node要么还是在j这个下标不变，要么放到j+oldCap，只有这两种情况。
resize中直接将hash和oldCap进行与操作，101&10000=0，10101&10000=10000，简单直接得判断出是否需要对当前节点改变为位置。
原位置若是链表直接这样分割即可，但若是树就比较复杂了。

 		/**
         * Splits nodes in a tree bin into lower and upper tree bins,
         * or untreeifies if now too small. Called only from resize;
         * see above discussion about split bits and indices.
         *
         * @param map the map
         * @param tab the table for recording bin heads
         * @param index the index of the table being split
         * @param bit the bit of hash to split on
         */
        final void split(HashMap<K,V> map, Node<K,V>[] tab, int index, int bit) {
            TreeNode<K,V> b = this;
            // Relink into lo and hi lists, preserving order
            TreeNode<K,V> loHead = null, loTail = null;
            TreeNode<K,V> hiHead = null, hiTail = null;
            int lc = 0, hc = 0;
            for (TreeNode<K,V> e = b, next; e != null; e = next) {
                next = (TreeNode<K,V>)e.next;
                e.next = null;
                if ((e.hash & bit) == 0) {
                    if ((e.prev = loTail) == null)
                        loHead = e;
                    else
                        loTail.next = e;
                    loTail = e;
                    ++lc;
                }
                else {
                    if ((e.prev = hiTail) == null)
                        hiHead = e;
                    else
                        hiTail.next = e;
                    hiTail = e;
                    ++hc;
                }
            }

            if (loHead != null) {
                if (lc <= UNTREEIFY_THRESHOLD)
                    tab[index] = loHead.untreeify(map);
                else {
                    tab[index] = loHead;
                    if (hiHead != null) // (else is already treeified)
                        loHead.treeify(tab);
                }
            }
            if (hiHead != null) {
                if (hc <= UNTREEIFY_THRESHOLD)
                    tab[index + bit] = hiHead.untreeify(map);
                else {
                    tab[index + bit] = hiHead;
                    if (loHead != null)
                        hiHead.treeify(tab);
                }
            }
        }

TreeNode是继承自LinkedHashMap.Entry的，依然保留有链表的特点。split先按照链表的处理方式重新计算下标分割高低位。若最终剩下的该下标位的节点数量小于等于UNTREEIFY_THRESHOLD，进行树转链表的操作untreeify，否则再次进行转树的操作treeify。

5、链表转树与树转链表

链表转树
前面已经说过链表转树之前会先判断table的大小，来决定是扩容还是转树。

final void treeifyBin(Node<K,V>[] tab, int hash) {
        int n, index; Node<K,V> e;
        if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY)
            resize();
        else if ((e = tab[index = (n - 1) & hash]) != null) {
            TreeNode<K,V> hd = null, tl = null;
            do {
                TreeNode<K,V> p = replacementTreeNode(e, null);
                if (tl == null)
                    hd = p;
                else {
                    p.prev = tl;
                    tl.next = p;
                }
                tl = p;
            } while ((e = e.next) != null);
            if ((tab[index] = hd) != null)
                hd.treeify(tab);
        }
    }

先将链表的Node变成TreeNode，其实这时候还是保留着链表的特点，然后调用TreeNode的treeify方法进行真正的转树操作。

	final void treeify(Node<K,V>[] tab) {
            TreeNode<K,V> root = null;
            //该方法是TreeNode的，因此这里this对象就是当前的树节点
            for (TreeNode<K,V> x = this, next; x != null; x = next) {
                next = (TreeNode<K,V>)x.next;
                x.left = x.right = null;
                //root为空，则就把当前接节点当成根节点，根节点为黑色
                if (root == null) {
                    x.parent = null;
                    x.red = false;
                    root = x;
                }
                else {
                    K k = x.key;
                    int h = x.hash;
                    Class<?> kc = null;
                    //每次从根节点开始找插入位置
                    for (TreeNode<K,V> p = root;;) {
                        int dir, ph;
                        K pk = p.key;
                        //当前节点在p的左边
                        if ((ph = p.hash) > h)
                            dir = -1;
                        //当前节点在p的右边
                        else if (ph < h)
                            dir = 1;
                        else if ((kc == null &&
                                  (kc = comparableClassFor(k)) == null) ||
                                 (dir = compareComparables(kc, k, pk)) == 0)
                            dir = tieBreakOrder(k, pk);

                        TreeNode<K,V> xp = p;
                        if ((p = (dir <= 0) ? p.left : p.right) == null) {
                            x.parent = xp;
                            if (dir <= 0)
                                xp.left = x;
                            else
                                xp.right = x;
                            root = balanceInsertion(root, x);
                            break;
                        }
                    }
                }
            }
            moveRootToFront(tab, root);
        }

众所周知，红黑树是一颗搜索树，节点之间是可比较大小的，才能以logn的效率进行搜索，这里其实是在以key的hash值进行排序，若hash值一样则判断key或者key所有实现的接口中是否有Comparable，若是则调用其compareTo方法；若是连Comparable接口都没有实现或者campareTo结果为0则

	static int tieBreakOrder(Object a, Object b) {
            int d;
            if (a == null || b == null ||
                (d = a.getClass().getName().
                 compareTo(b.getClass().getName())) == 0)
                d = (System.identityHashCode(a) <= System.identityHashCode(b) ?
                     -1 : 1);
            return d;
        }

每个key在hashmap中都是唯一的，所以dir只能大于0或小于0，绝对不能出现0。tieBreakOrder先比较两个对象的类名，类名是字符串对象，就按字符串的比较规则。如果两个对象是同一个类型，那么调用本地方法为两个对象生成hashCode值，再进行比较，hashCode相等的话也返回-1。
插入当前节点x后，红黑树的特性可能被破坏，因此需要进行一些变色、左旋、右旋的操作让树重新成为一颗红黑树。

	static <K,V> TreeNode<K,V> balanceInsertion(TreeNode<K,V> root,
                                                    TreeNode<K,V> x) {
            x.red = true;
            for (TreeNode<K,V> xp, xpp, xppl, xppr;;) {
                if ((xp = x.parent) == null) {
                    x.red = false;
                    return x;
                }
                else if (!xp.red || (xpp = xp.parent) == null)
                    return root;
                if (xp == (xppl = xpp.left)) {
                    if ((xppr = xpp.right) != null && xppr.red) {
                        xppr.red = false;
                        xp.red = false;
                        xpp.red = true;
                        x = xpp;
                    }
                    else {
                        if (x == xp.right) {
                            root = rotateLeft(root, x = xp);
                            xpp = (xp = x.parent) == null ? null : xp.parent;
                        }
                        if (xp != null) {
                            xp.red = false;
                            if (xpp != null) {
                                xpp.red = true;
                                root = rotateRight(root, xpp);
                            }
                        }
                    }
                }
                else {
                    if (xppl != null && xppl.red) {
                        xppl.red = false;
                        xp.red = false;
                        xpp.red = true;
                        x = xpp;
                    }
                    else {
                        if (x == xp.left) {
                            root = rotateRight(root, x = xp);
                            xpp = (xp = x.parent) == null ? null : xp.parent;
                        }
                        if (xp != null) {
                            xp.red = false;
                            if (xpp != null) {
                                xpp.red = true;
                                root = rotateLeft(root, xpp);
                            }
                        }
                    }
                }
            }
        }

红黑树原理参考
树转链表就比较简单了，直接将TreeNode变成Node即可。

	final Node<K,V> untreeify(HashMap<K,V> map) {
            Node<K,V> hd = null, tl = null;
            for (Node<K,V> q = this; q != null; q = q.next) {
                Node<K,V> p = map.replacementNode(q, null);
                if (tl == null)
                    hd = p;
                else
                    tl.next = p;
                tl = p;
            }
            return hd;
        }

6、get方法

先计算key的hash值

public V get(Object key) {
        Node<K,V> e;
        return (e = getNode(hash(key), key)) == null ? null : e.value;
    }

getNode

final Node<K,V> getNode(int hash, Object key) {
        Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
        if ((tab = table) != null && (n = tab.length) > 0 &&
            (first = tab[(n - 1) & hash]) != null) {
            if (first.hash == hash && // always check first node
                ((k = first.key) == key || (key != null && key.equals(k)))) //若用当前key取到的数组上的元素key正好是要获取的key时，直接返回
                return first;
            if ((e = first.next) != null) {
                if (first instanceof TreeNode)  //若是树结构，则按红黑树进行搜索，时间复杂度O(logn)
                    return ((TreeNode<K,V>)first).getTreeNode(hash, key);
                do {  //否则按照单链表结构一个一个往下查，时间复杂度O(n)
                    if (e.hash == hash &&
                        ((k = e.key) == key || (key != null && key.equals(k))))
                        return e;
                } while ((e = e.next) != null);
            }
        }
        return null;
    }

也就是说在冲突较多时，可以使用红黑树提高查询效率。

小方好方

发布了26 篇原创文章 · 获赞 8 · 访问量 1万+

私信关注