HashMap源码分析

转载

mb5fcdf2ea5f3c5 2021-08-23 22:08:00

HashMap

hash

-- JDK 1.8 中树化之后默认按照hashCode排序，如果对象实现了compareTo方法，则会按照对应的方法排序

// 将hashCode的高位与低位异或，从而使得高位可以影响哈希值，以减少哈希碰撞
static final int hash(Object key) {
   int h;
   return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}

getNode

final Node<K,V> getNode(int hash, Object key) {
   Node<K,V>[] tab; Node<K,V> first, e; int n; K k;
   if ((tab = table) != null && (n = tab.length) > 0 &&
          (first = tab[(n - 1) & hash]) != null) {
       // 第一个元素就是结果
       if (first.hash == hash && // always check first node
              ((k = first.key) == key || (key != null && key.equals(k))))
           return first;
       // 第一个元素不是结果
       if ((e = first.next) != null) {

           // 如果是树 使用树查找
           if (first instanceof TreeNode)
               return ((TreeNode<K,V>)first).getTreeNode(hash, key);
           // 否则按照链表往下查找
           do {
               if (e.hash == hash &&
                      ((k = e.key) == key || (key != null && key.equals(k))))
                   return e;
          } while ((e = e.next) != null);
      }
  }
   return null;
}

put

---- JDK1.7中采用头插法，JDK1.8中采用尾插法；

头插法在多线程环境下可能会导致循环链表，从而程序死循环。

---- 第一次插入节点需要初始化列表

---- 如果插入位置为空，直接插入新节点；

|-- 否则判断节点key是否相同，相同则替换新值；

|-- 不相同，则根据节点类型插入数据；

|-- 如果是树节点，则将节点插入树，如果是链表，则需要先遍历链表查找是否存在相同的key，不存在则在尾部插入新节点；

---- 最后判断长度是否超过8，哈希桶数组是否达到64，超过则进行树化。

// 插入值
final V putVal(int hash, K key, V value, boolean onlyIfAbsent,
              boolean evict) {
   Node<K,V>[] tab; Node<K,V> p; int n, i;

   // 如果是第一次插入值，则需要初始化链表
   if ((tab = table) == null || (n = tab.length) == 0)
       n = (tab = resize()).length;

   // 如果hash位置为空，则直接存入新的节点
   if ((p = tab[i = (n - 1) & hash]) == null)
       tab[i] = newNode(hash, key, value, null);
   else {
       Node<K,V> e; K k;
       // 新值的hash和key与数组中的相等，则在后面的代码中替换值即可
       if (p.hash == hash &&
              ((k = p.key) == key || (key != null && key.equals(k))))
           e = p;
       // 如果p是树，则插入p
       else if (p instanceof TreeNode)
           e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value);
       // 否则需要遍历链表
       else {
           for (int binCount = 0; ; ++binCount) {
               // 尾插
               if ((e = p.next) == null) {
                   p.next = newNode(hash, key, value, null);
                   if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st
                       treeifyBin(tab, hash);
                   break;
              }
               // 找到key相等的，则替换值
               if (e.hash == hash &&
                      ((k = e.key) == key || (key != null && key.equals(k))))
                   break;
               p = e;
          }
      }
       // 替换值
       if (e != null) { // existing mapping for key
           V oldValue = e.value;
           if (!onlyIfAbsent || oldValue == null)
               e.value = value;
           afterNodeAccess(e);
           return oldValue;
      }
  }
   ++modCount;
   if (++size > threshold)
       resize();
   afterNodeInsertion(evict);
   return null;
}

resize

1 // 容量扩大
 2 final Node<K,V>[] resize() {
 3    Node<K,V>[] oldTab = table;
 4 
 5    // 计算新的容量和阈值
 6    int oldCap = (oldTab == null) ? 0 : oldTab.length;
 7    int oldThr = threshold;
 8    int newCap, newThr = 0;
 9    if (oldCap > 0) {
10        // 容量已经达到最大
11        if (oldCap >= MAXIMUM_CAPACITY) {
12            threshold = Integer.MAX_VALUE;
13            return oldTab;
14       }
15        else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY &&
16                oldCap >= DEFAULT_INITIAL_CAPACITY)
17            newThr = oldThr << 1; // double threshold
18   }
19 
20    else if (oldThr > 0) // initial capacity was placed in threshold
21        newCap = oldThr;
22 
23    // new HashMap<>() 未指定长度
24    else {               // zero initial threshold signifies using defaults
25        newCap = DEFAULT_INITIAL_CAPACITY;
26        newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY);
27   }
28    if (newThr == 0) {
29        float ft = (float)newCap * loadFactor;
30        newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ?
31               (int)ft : Integer.MAX_VALUE);
32   }
33    threshold = newThr;
34 
35    // 进行扩容和rehash
36    @SuppressWarnings({"rawtypes","unchecked"})
37    Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap];
38    table = newTab;
39    if (oldTab != null) {
40        for (int j = 0; j < oldCap; ++j) {
41            Node<K,V> e;
42            if ((e = oldTab[j]) != null) {
43                oldTab[j] = null;
44                // e是链表节点且只有e一个节点
45                if (e.next == null)
46                    newTab[e.hash & (newCap - 1)] = e;
47                // e是树节点
48                else if (e instanceof TreeNode)
49                   ((TreeNode<K,V>)e).split(this, newTab, j, oldCap);
50                // e是链表且有多个节点
51                else { // preserve order
52                    // lo为rehash后与原位置相同的链表，hi为位置不同的链表
53                    Node<K,V> loHead = null, loTail = null;
54                    Node<K,V> hiHead = null, hiTail = null;
55                    Node<K,V> next;
56                    do {
57                        next = e.next;
58                        // 直接hash计算位置时为 e.hash & (oldCap-1), 即oldCap=16时，oldCap-1=1111b
59                        // 假设原容量为16，即oldCap=16 = 10000b
60                        // e.hash低5位为01010，原始位置为 e.hash & 10000b = 00000b
61                        // e.hash第5位可能为0或1，则新的位置为 e.hash & 10000b = (0/1)0000b
62                        if ((e.hash & oldCap) == 0) {
63                            if (loTail == null)
64                                loHead = e;
65                            else
66                                loTail.next = e;
67                            loTail = e;
68                       }
69                        else {
70                            if (hiTail == null)
71                                hiHead = e;
72                            else
73                                hiTail.next = e;
74                            hiTail = e;
75                       }
76                   } while ((e = next) != null);
77 
78                    // 低链表位置不变
79                    if (loTail != null) {
80                        loTail.next = null;
81                        newTab[j] = loHead;
82                   }
83                    // 高链表放到新的位置 即 j + oldCap
84                    if (hiTail != null) {
85                        hiTail.next = null;
86                        newTab[j + oldCap] = hiHead;
87                   }
88               }
89           }
90       }
91   }
92    return newTab;
93 }
94

remove

1 final Node<K,V> removeNode(int hash, Object key, Object value,
 2                           boolean matchValue, boolean movable) {
 3    Node<K,V>[] tab; Node<K,V> p; int n, index;
 4 
 5    // 找到需要删除的节点
 6    if ((tab = table) != null && (n = tab.length) > 0 &&
 7           (p = tab[index = (n - 1) & hash]) != null) {
 8        Node<K,V> node = null, e; K k; V v;
 9 
10        // 情况1，hash列表中当前位置仅一个节点
11        if (p.hash == hash &&
12               ((k = p.key) == key || (key != null && key.equals(k))))
13            node = p;
14        else if ((e = p.next) != null) {
15 
16            // 情况2，hash列表中当前位置为一棵树
17            if (p instanceof TreeNode)
18                node = ((TreeNode<K,V>)p).getTreeNode(hash, key);
19 
20            // 情况3，当前位置为一个链表
21            else {
22                do {
23                    if (e.hash == hash &&
24                           ((k = e.key) == key ||
25                                   (key != null && key.equals(k)))) {
26                        node = e;
27                        break;
28                   }
29                    p = e;
30               } while ((e = e.next) != null);
31           }
32       }
33 
34        // 进行节点的删除
35        if (node != null && (!matchValue || (v = node.value) == value ||
36               (value != null && value.equals(v)))) {
37            if (node instanceof TreeNode)
38               ((TreeNode<K,V>)node).removeTreeNode(this, tab, movable);
39            else if (node == p)
40                tab[index] = node.next;
41            else
42                p.next = node.next;
43            ++modCount;
44            --size;
45            afterNodeRemoval(node);
46            return node;
47       }
48   }
49    return null;
50 }
51

树的退化：如果、、或者为空，则将树退化为链表：

1 final void removeTreeNode(HashMap<K,V> map, Node<K,V>[] tab,
 2                                  boolean movable){
 3    // ......
 4    if (root == null
 5        || (movable
 6            && (root.right == null
 7                || (rl = root.left) == null
 8                || rl.left == null))) {
 9        tab[index] = first.untreeify(map);  // too small
10        return;
11   }
12    //......
13 }

split

将红黑树进行rehash和拆分

1 /**
 2 * Splits nodes in a tree bin into lower and upper tree bins,
 3 * or untreeifies if now too small. Called only from resize;
 4 * see above discussion about split bits and indices.
 5 */
 6 final void split(HashMap<K,V> map, Node<K,V>[] tab, int index, int bit) {
 7    TreeNode<K,V> b = this;
 8    // Relink into lo and hi lists, preserving order
 9    TreeNode<K,V> loHead = null, loTail = null;
10    TreeNode<K,V> hiHead = null, hiTail = null;
11    int lc = 0, hc = 0;
12 
13    // TreeNode 继承了 LinkedHashMap.Entry, 后者又继承了 HashMap.Node
14    // 所以TreeNode 也存在next域，向红黑树插入TreeNode时会将上一个插入结点的next域指向当前插入的TreeNode
15    
16    // 将树拆分为两个链表
17    for (TreeNode<K,V> e = b, next; e != null; e = next) {
18        next = (TreeNode<K,V>)e.next;
19        e.next = null;
20        if ((e.hash & bit) == 0) {   // bit = oldCap ，在resize()中传入
21            if ((e.prev = loTail) == null)
22                loHead = e;
23            else
24                loTail.next = e;
25            loTail = e;
26            ++lc;
27       }
28        else {
29            if ((e.prev = hiTail) == null)
30                hiHead = e;
31            else
32                hiTail.next = e;
33            hiTail = e;
34            ++hc;
35       }
36   }
37 
38    if (loHead != null) {
39        // 如果小于退化阈值，则将树退化为链表
40        if (lc <= UNTREEIFY_THRESHOLD)
41            tab[index] = loHead.untreeify(map);
42        else {
43        // 否则保持红黑树状态
44            tab[index] = loHead;
45            // 如果hiHead不为空，则说明有部分节点在hiHead处，需要对loHead重新树化；
46            // 如果hiHead为空，则说明所有节点在hiHead这边，树的形态没有得到破环，不需要重新树化。
47            if (hiHead != null) // (else is already treeified)
48                loHead.treeify(tab);
49       }
50   }
51    if (hiHead != null) {
52        if (hc <= UNTREEIFY_THRESHOLD)
53            tab[index + bit] = hiHead.untreeify(map);
54        else {
55            tab[index + bit] = hiHead;
56            if (loHead != null)
57                hiHead.treeify(tab);
58       }
59   }
60 }

QA

为什么负载因子是0.75？

选择0.75作为默认的加载因子，完全是时间和空间成本上寻求的一种折衷选择。

当负载因子是1.0的时候，也就意味着，只有当数组的8个值（这个图表示了8个）全部填充了，才会发生扩容。这就带来了很大的问题，因为Hash冲突时避免不了的。当负载因子是1.0的时候，意味着会出现大量的Hash的冲突，底层的红黑树变得异常复杂。对于查询效率极其不利。这种情况就是牺牲了时间来保证空间的利用率。

当负载因子太小时，虽然时间效率提升了，但是空间利用率降低了。

在理想情况下，使用随机哈希码，在扩容阈值（加载因子）为0.75的情况下，节点出现在频率在Hash桶（表）中遵循参数平均为0.5的泊松分布。计算结果如上述的列表所示，当一个bin中的链表长度达到8个元素的时候，概率为0.00000006，几乎是一个不可能事件。

在维基百科来描述加载因子：

对于开放定址法，加载因子是特别重要因素，应严格限制在0.7-0.8以下。超过0.8，查表时的CPU缓存不命中（cache missing）按照指数曲线上升。因此，一些采用开放定址法的hash库，如Java的系统库限制了加载因子为0.75，超过此值将resize散列表。

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。