PostgreSQL数据库数据结构——操作动态哈希表

原创

mb62de8abf75c00 2022-07-26 20:18:32 ©著作权

©著作权归作者所有：来自51CTO博客作者mb62de8abf75c00的原创作品，请联系作者获取转载授权，否则将追究法律责任

PostgreSQL数据库数据结构——操作动态哈希表_#define

hash_search函数在table中查找key，并执行操作。hash_search_with_hash_value函数输入的key已经计算过其哈希值。
action的值为：查找key（HASH_FIND）、查找key，如果没有就创建（HASH_ENTER）、查找key，如果没有就创建，如果out of memory返回NULL（HASH_ENTER_NULL）、查找key，如果有就移除（HASH_REMOVE）。

void *hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr) {
  return hash_search_with_hash_value(hashp, keyPtr, hashp->hash(keyPtr, hashp->keysize), action, foundPtr);
}

返回值为指向元素（found/entered/removed）的指针或者是NULL。如果foundPtr不为NULL，找到条目就返回true，否则返回false。

hash_search_with_hash_value函数首先判别如果动作为插入，先检查是否需要扩容，调用expand_table扩容。

void *hash_search_with_hash_value(HTAB *hashp,const void *keyPtr,uint32 hashvalue,HASHACTION action,bool *foundPtr) {
  HASHHDR    *hctl = hashp->hctl;
  int      freelist_idx = FREELIST_IDX(hctl, hashvalue);
  Size    keysize;
  uint32    bucket;
  long    segment_num;
  long    segment_ndx;
  HASHSEGMENT segp;
  HASHBUCKET  currBucket;
  HASHBUCKET *prevBucketPtr;
  HashCompareFunc match;
  if (action == HASH_ENTER || action == HASH_ENTER_NULL) {
    /* Can't split if running in partitioned mode, nor if frozen, nor if table is the subject of any active hash_seq_search scans.  Strange order of these tests is to try to check cheaper conditions first. */
    if (!IS_PARTITIONED(hctl) && !hashp->frozen && hctl->freeList[0].nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor && !has_seq_scans(hashp))
      (void) expand_table(hashp);
  }

先通过calc_bucket函数计算出bucket桶的序号，通过bucket >> hashp->sshift计算出目录中第几个段。然后通过MOD(bucket, hashp->ssize)计算出在该段中的桶的序号，然后while循环查找桶的哈希值和函数需要查找的元素的哈希值相同的桶。

PostgreSQL数据库数据结构——操作动态哈希表_while循环_02

/* Do the initial lookup */
  bucket = calc_bucket(hctl, hashvalue);
  segment_num = bucket >> hashp->sshift;
  segment_ndx = MOD(bucket, hashp->ssize);
  segp = hashp->dir[segment_num];
  if (segp == NULL)
    hash_corrupted(hashp);
  prevBucketPtr = &segp[segment_ndx];
  currBucket = *prevBucketPtr;

  /* Follow collision chain looking for matching key */
  match = hashp->match;    /* save one fetch in inner loop */
  keysize = hashp->keysize;  /* ditto */

  while (currBucket != NULL){
    if (currBucket->hashvalue == hashvalue && match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
      break;
    prevBucketPtr = &(currBucket->link);
    currBucket = *prevBucketPtr;
  }
  if (foundPtr)  *foundPtr = (bool) (currBucket != NULL);

查找

借用上面的代码进行初始查找之后可以直接返回HASHELEMTENT的指针。

/* OK, now what? */
  switch (action) {
    case HASH_FIND:
      if (currBucket != NULL) return (void *) ELEMENTKEY(currBucket);
      return NULL;
      
#define

删除

到这里已经查找到要删除的元素了，先需咬减少空闲链表中对应元素的条目数量计数器，然后从哈希桶链中剔除该HASHELEMTENT元素，并嘉庆加入到空闲链表链中。

case HASH_REMOVE:
      if (currBucket != NULL) {
        /* if partitioned, must lock to touch nentries and freeList */
        if (IS_PARTITIONED(hctl)) SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));
        /* delete the record from the appropriate nentries counter. */
        Assert(hctl->freeList[freelist_idx].nentries > 0);
        hctl->freeList[freelist_idx].nentries--;
        /* remove record from hash bucket's chain. */
        *prevBucketPtr = currBucket->link;
        /* add the record to the appropriate freelist. */
        currBucket->link = hctl->freeList[freelist_idx].freeList;
        hctl->freeList[freelist_idx].freeList = currBucket;
        if (IS_PARTITIONED(hctl)) SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
        /* better hope the caller is synchronizing access to this element, because someone else is going to reuse it the next time something is added to the table */
        return (void *) ELEMENTKEY(currBucket);
      }
      return NULL;

插入

先查询是否已有这样的条目，有就直接返回，没有则进行创建，通过get_hash_entry函数在空闲链表中先申请空间，然后将返回的HASHELEMTENT指针插入相应桶的链尾。

case HASH_ENTER_NULL:
      /* ENTER_NULL does not work with palloc-based allocator */
      Assert(hashp->alloc != DynaHashAlloc);
      /* FALL THRU */
    case HASH_ENTER:
      /* Return existing element if found, else create one */
      if (currBucket != NULL) return (void *) ELEMENTKEY(currBucket);
      /* disallow inserts if frozen */
      if (hashp->frozen) elog(ERROR, "cannot insert into frozen hashtable \"%s\"", hashp->tabname);
      currBucket = get_hash_entry(hashp, freelist_idx);
      if (currBucket == NULL) {
        /* out of memory */
        if (action == HASH_ENTER_NULL) return NULL;
        /* report a generic message */
        if (hashp->isshared) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory")));
        else ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory")));
      }
      /* link into hashbucket chain */
      *prevBucketPtr = currBucket;
      currBucket->link = NULL;
      /* copy key into record */
      currBucket->hashvalue = hashvalue;
      hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);
      /* Caller is expected to fill the data field on return.  DO NOT insert any code that could possibly throw error here, as doing so would leave the table entry incomplete and hence corrupt the caller's data structure. */
      return (void *) ELEMENTKEY(currBucket);
  }
  elog(ERROR, "unrecognized hash action code: %d", (int) action);
  return NULL;        /* keep compiler quiet */
}