ios 哈希表原理哈希表怎么实现的

转载

mob64ca13f40f3d 2024-01-13 19:53:38

文章标签 ios 哈希表原理 Boo #include hash表 文章分类 iOS 移动开发

散列表（Hash table，也叫哈希表），是根据关键码值(Key value)而直接进行访问的数据结构。也就是说，它通过把关键码值映射到表中一个位置来访问记录，以加快查找的速度。这个映射函数叫做散列函数，存放记录的数组叫做散列表。

给定表M，存在函数f(key)，对任意给定的关键字值key，代入函数后若能得到包含该关键字的记录在表中的地址，则称表M为哈希(Hash）表，函数f(key)为哈希(Hash) 函数。
常见的一种哈希表：

本章中的哈希表是将两种数据结构结合构成，数组和双端链表，对于哈希算法来说，其核心是在哈希函数的设计，哈希函数使得所有的键值均匀的进行散列，使得查找速度提到。

hash表的具体实现：
我们在hash.h中定义了Hash结构体和哈希的一些接口声明，其具体实现如下：

<pre name="code" class="plain"><span style="font-size:18px;"><strong>1 #ifndef _HASH_H_
  2 #define _HASH_H_
  3 
  4 #include "dlist.h"
  5 
  6 typedef int (*Hash_func)(const void *key);
  7 
  8 typedef struct Hash{
  9     Dlist      **table;    //1.hash表的存储实体
 10     int   bucket_count;    //2.hash表桶的数量
 11     int  element_count;    //3.hash表元素数量
 12 
 13     void (*hash_free)(const void *ptr);
 14     Boolean (*hash_match)(const void *value1,
 15                      const void *value2);
 16     int (*hash_func)(const void *key);    //把元素均匀hash
 17 }Hash;
 18 
 19 //hash表的接口
 20 Hash    *init_hash(int buck_count, Hash_func hash_func);    //hash初始化
 21 void    destory_hash(Hash **hash);    //hash销毁
 22 Boolean hash_insert(Hash *hash,const void *value);    //hash插入
 23 Boolean hash_serach(Hash *hash,const void *value);    //hash查找
 24 Boolean hash_remove(Hash *hash,const void *value);    //hash删除
 25 void    print_hash_table(const Hash *hash);    //打印hash表
 26 int     get_element_count(const Hash *hash);    //得到hash元素个数
 27 
 28 
 29 int     int_hash_func(const void *key);    //hash函数
 30 #endif</strong></span>

hash.c的具体实现如下：

<pre name="code" class="plain"><span style="font-size:18px;"><strong>#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include "hash.h"
Hash    *init_hash(int buck_count, Hash_func hash_func)    //hash初始化
{
    Hash *hash = (Hash *)Malloc(sizeof(Hash));

    hash ->bucket_count = buck_count;
    hash ->element_count = 0;
    hash ->hash_free = NULL;
    hash ->hash_match = NULL;
    hash ->hash_func = hash_func;

    //先申请各个桶的地址，但不对桶初始化，直到该桶被使用的时候
    //再临时进行初始化
    hash ->table = (Dlist **)Malloc(sizeof(Dlist *) * buck_count);
    bzero(hash ->table,sizeof(Dlist *) *buck_count);

    return hash;
}
void    destory_hash(Hash **hash)    //hash销毁
{
    int i = 0;
    int bucket_size = 0;
    if(hash == NULL || *hash == NULL){
        return ;
    }
    
    //销毁步骤：
    //1.先销毁双端链表的节点信息(销毁桶)
    //2.销毁table
    //3.销毁hash
    bucket_size = (*hash) ->bucket_count;
    for( i = 0; i < bucket_size;++i){
        destroy_dlist(&(*hash) -> table[i]);
    }
    free((*hash)->table);
    free(*hash);
    *hash = NULL;
}
Boolean hash_insert(Hash *hash,const void *value)    //hash插入
{
    int bucket = 0;
    //如果hash表不存在或者元素已经插入到hash表中，则不进行插入操作
    if(hash == NULL || value == NULL ||
       hash_serach(hash,value) == TRUE){
        return FALSE;
    }
    
    //通过hash函数判断将元素插入到指定下标的桶内
    bucket = hash ->hash_func(value) % hash ->bucket_count;
    //采用头部插入方法
    push_front(hash ->table[bucket],(void *)value);
    hash ->element_count++;
    return TRUE;
}
Boolean hash_serach(Hash *hash,const void *value)    //hash查找
{
    int bucket = 0;
    Dlist_node *p_node = NULL;
    if(hash == NULL || value == NULL){
        return FALSE;
    }
    bucket = hash ->hash_func(value) % hash ->bucket_count;

    //如果这个桶不存在，则需对桶进行初始化
    if(hash ->table[bucket] == NULL){
        hash ->table[bucket] = init_dlist();
        return FALSE;
    }
    //查找操作
    if(hash ->hash_match == NULL){
        for(p_node = hash ->table[bucket] ->head;
        p_node;p_node = p_node ->next){
            if(p_node ->data == (void *)value){
                return TRUE;
            }
        }
    }else{
        for(p_node = hash ->table[bucket] ->head;
        p_node;p_node = p_node ->next){
            if(hash ->hash_match(p_node ->data,value)){
                return TRUE;
            }
        }

    }
    return FALSE;
}
Boolean hash_remove(Hash *hash,const void *value)    //hash删除
{
    int bucket = 0;
    Dlist_node *p_node = NULL;
    if(hash == NULL || value == NULL
       || hash ->element_count == 0){
        return FALSE;
    }
    
    //找到元素所在的桶
    bucket = hash ->hash_func(value) % hash ->bucket_count;
    if(hash ->hash_match){
        for(p_node = hash ->table[bucket] ->head;p_node;p_node = p_node ->next){
            if(hash ->hash_match(p_node ->data,value)){
                remove_dlist_node(hash ->table[bucket],p_node);
                hash ->element_count--;
                return TRUE;
            }
        }
    }else{
        for(p_node = hash ->table[bucket] ->head;p_node;p_node = p_node ->next){
            if(p_node ->data == value){
                remove_dlist_node(hash ->table[bucket],p_node);
                hash ->element_count--;
                return TRUE;
            }
        }
    }
    return FALSE;
}
void    print_hash_table(const Hash *hash)    //打印hash表
{
    int i = 0;
    int bucket_size = 0;
    if(hash == NULL || hash ->element_count == 0){
        return ;
    }
    bucket_size = hash ->bucket_count;
    //从下标为0的位置开始遍历
    for(i = 0;i < hash ->bucket_count; ++i){
        printf("bucket[%d]:",i);
        print_dlist(hash ->table[i],print_int);
    }
}
int     get_element_count(const Hash *hash)    //得到hash元素个数
{
    if(hash == NULL){
        return -1;
    }
    return hash ->element_count;
}
int int_hash_func(const void *key)
{
    return *(int *)key;
}

</strong></span>

在哈希表中的每个槽中都记录了双端链表的控制信息，dlist.c和dlist.h的具体实现如下：

<span style="font-size:18px;"><strong>dlist.h :

#ifndef _DLIST_H_
#define _DLIST_H_

#include "tools.h"

enum Count{
    ZERO,
    ONE,
};

typedef  void (*Print_func)(void *value);

//链表节点类型
typedef struct Dlist_node{
    struct Dlist_node *prev;    //前驱
    struct Dlist_node *next;    //后继
    void              *data;    //可以接收任意类型指针（达到通用的效果）
}Dlist_node;


typedef struct Dlist{
    struct Dlist_node *head;    //头节点
    struct Dlist_node *tail;    //尾节点
    int               count;    //数量
    
    //data所指向内容的释放策略
    void (*free)(void *ptr);
    //data所指向内容的相等策略
    Boolean (*match)(void *value1, void *value2);
    //data所指向内容的拷贝策略
    void *(*copy_node)(void *value);
}Dlist;

//通用链表的接口定义
Dlist   *init_dlist(void)           ;    //双端链表的初始化
void    destroy_dlist(Dlist **dlist);   //双端链表的销毁
Boolean push_front(Dlist *dlist, void *value);    //头插
Boolean push_back(Dlist *dlist, void *value) ;    //尾插
Boolean pop_front(Dlist *dlist);    //头删
Boolean pop_back(Dlist *dlist) ;   //尾删

Dlist_node *find_node(Dlist *dlist, void *value);   //节点查找
Boolean insert_prev(Dlist *dlist, Dlist_node *node, 
            void *value);    //插入到指定节点前边
Boolean insert_next(Dlist *dlist, Dlist_node *node,
            void *value);    //插入到指定节点后边
Boolean remove_dlist_node(Dlist *dlist, Dlist_node *node);   //删除指定节点
void    print_dlist(Dlist *dlist, Print_func print)      ;   //链表的打印
Boolean get_front(Dlist *dlist, void **value);   //得到头节点的data
Boolean get_tail(Dlist *dlist, void **value) ;    //得到尾节点的data
int     get_dlist_count(Dlist *dlist);   //得到链表数量


#endif
</strong></span>

dlist.c:

<span style="font-size:18px;"><strong>#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include "dlist.h"
#include "tools.h"

Dlist    *init_dlist(void)        //双端链表的初始化
{
    Dlist *dlist = (Dlist *)Malloc(sizeof(Dlist));
    bzero(dlist,sizeof(Dlist));

    return dlist;
}
void     destroy_dlist(Dlist **dlist)    //双端链表的销毁
{
    if(dlist == NULL || *dlist == NULL){
        return ;
    }
    //销毁步骤：
    //1.先销毁链表节点（头删）
    while((*dlist) -> count){
        pop_front(*dlist);
    }
    //2.再销毁链表的控制信息
    free(*dlist);
    *dlist = NULL;
}
Boolean  push_front(Dlist *dlist,void *value)    //头插
{
    Dlist_node * p_node = NULL;
    if(dlist == NULL || value == NULL){
        return FALSE;
    }
    p_node = (Dlist_node *)Malloc(sizeof(Dlist_node));
    p_node ->data = value;

    if(dlist ->count == ZERO){
        dlist ->head = dlist ->tail = p_node;
  
    }else{
       p_node ->next = dlist ->head;
       dlist ->head ->prev = p_node;
       dlist ->head = p_node;
    }
    dlist ->count++;
    return TRUE;
}
Boolean  push_back(Dlist *dlist,void *value)    //尾插
{
    Dlist_node *p_node = NULL;
    if(dlist == NULL || value == NULL){
        return FALSE;
    }
    p_node = (Dlist_node *)Malloc(sizeof(Dlist_node));
    p_node ->data = value;
    p_node ->next = NULL;

    if(dlist ->count == ZERO){
        dlist ->head = dlist ->tail = p_node;
    }else{
        dlist ->tail ->next = p_node;
        p_node ->prev = dlist ->tail;
        dlist ->tail = p_node; 
    }
    dlist ->count++;
    return TRUE;
}
Boolean  pop_front(Dlist *dlist)    //头删
{
    Dlist_node *p_node = NULL;
    if(dlist == NULL || dlist ->count == ZERO){
        return FALSE;
    }
    p_node = dlist ->head;
    if(dlist ->count == ONE){
        dlist ->head = dlist ->tail = NULL;
    }else{
        dlist ->head = dlist ->head ->next;
        dlist ->head ->prev = NULL;
    }

    //释放节点，要对free指针做判断（data指向是堆还是栈）
    if(dlist ->free){
        dlist ->free(p_node ->data);
    }
    free(p_node);
    dlist ->count--;
    return TRUE;
}
Boolean  pop_back(Dlist *dlist)    //尾删
{
    Dlist_node *p_node = NULL;
    if(dlist == NULL || dlist ->count == ZERO){
        return FALSE;
    }
    p_node = dlist ->tail;
    if(dlist ->count == ONE){
        dlist ->head = dlist ->tail = NULL;
    }else{
        dlist ->tail = p_node ->prev;
        dlist ->tail ->next = NULL;
    }

    if(dlist ->free){
        dlist ->free(p_node);
    }
    free(p_node);
    dlist ->count--;
    return TRUE;
}

Dlist_node *find_node(Dlist *dlist,void *value)    //节点查找
{
    Dlist_node *p = NULL;
    if(dlist == NULL || dlist ->count == ZERO 
       || value == NULL){
        return NULL;
    }
    if(dlist ->match){
        for(p = dlist ->head; p;p = p ->next){
            if(!dlist ->match(p ->data,value)){
                return p;
            }
        }
    }else{
        for(p = dlist ->head; p;p = p ->next){
            if(p ->data == value){
                return p;
            }
        }
    }
    return p;
}
Boolean insert_prev(Dlist *dlist,Dlist_node *node,
                     void *value)    //插入到指定节点前边
{
    Dlist_node *p_node = NULL;
    if(dlist == NULL || node == NULL || value == NULL){
        return FALSE;
    }
    p_node = (Dlist_node *)Malloc(sizeof(Dlist_node));
    p_node ->data = value;

    p_node ->next = node;
    p_node ->prev = node ->prev;
    //当前链表只有一个节点（它是新的头结点）
    if(node ->prev == NULL){
        dlist ->head = p_node;
    }else{
        node ->prev ->next = p_node;
    }
    node ->prev = p_node;
    dlist ->count++;
    return TRUE;
}
Boolean insert_next(Dlist *dlist,Dlist_node *node,
                     void *value)    //插入到指定节点后边
{
    Dlist_node *p_node = NULL;
    if(dlist == NULL || node == NULL || value == NULL){
        return FALSE;
    }
    p_node = (Dlist_node *)Malloc(sizeof(Dlist_node));
    p_node ->data = value;
    p_node ->next = NULL;

    p_node ->prev = node;
    p_node ->next = node ->next;
    if(node ->next == NULL){
        dlist ->tail = p_node;
    }else{
        node ->next ->prev = p_node;
    }
    node ->next = p_node;
    dlist ->count++;
    return TRUE;
}
Boolean remove_dlist_node(Dlist *dlist,Dlist_node *node)    //删除指定节点
{
    if(dlist == NULL || dlist ->count == ZERO || node == NULL){
        return FALSE;
    }
    if(node == dlist ->tail){
         pop_back(dlist);
    }else if(node == dlist ->head){
         pop_front(dlist);
    }else{
        node ->prev ->next = node ->next;
        node ->next ->prev = node ->prev;
        if(dlist ->free){
            dlist ->free(node ->data);
        }
        free(node);
        dlist ->count--;
        return TRUE;
    }
}
void    print_dlist(Dlist *dlist,Print_func print)    //链表的打印
{
    Dlist_node *p_node = NULL;
    if(dlist != NULL && dlist ->count && print != NULL){
        for(p_node = dlist ->head;p_node;p_node = p_node ->next){
            print(p_node ->data);
        }
        printf("\n");
    }
}
Boolean get_front(Dlist *dlist,void **value)    //得到头结点的data
{
    Dlist_node *p_node = NULL;
    if(dlist == NULL || value == NULL || dlist ->count == ZERO){
        return 	FALSE;
    }
    p_node = dlist ->head;
    *value = dlist ->head ->data;
    return TRUE;
}
Boolean get_tail(Dlist *dlist,void **value)    //得到尾节点的data
{
    Dlist_node *p_node = NULL;
    if(dlist == NULL || value == NULL || dlist ->count == ZERO){
        return 	FALSE;
    }
    p_node = dlist ->tail;
    *value = dlist ->tail ->data;
    return TRUE;
}
int     get_dlist_count(Dlist *dlist)    //得到链表数量
{
    if(dlist == NULL){
        return -1;
    }
    return dlist ->count;
}
</strong></span>

在dlist.c中包含tools.h ,该头文件为一个工具头文件，其中包括包裹函数Malloc和一些简单的函数
其具体实现如下：
tools.h :

<span style="font-size:18px;"><strong>#ifndef _TOOLS_H_
#define _TOOLS_H_


//定义布尔类型
#define TRUE   (1)
#define FALSE  (0)


typedef unsigned char Boolean;

//定义接口
void *Malloc(size_t size);

void print_int(void *value);
#endif
</strong></span>

tools.c :

<span style="font-size:18px;"><strong>#include <stdio.h>
#include <stdlib.h>
#include "tools.h"

void *Malloc(size_t size)
{
    void *result = malloc(size);
    if(result == NULL){
        fprintf(stderr,"the memory is full!\n");
        exit(1);
    }
    return result;
}

void print_int(void *value)
{
    int *p = (int *)value;
    printf("%5d",*p);
}

</strong></span>

测试程序：

#include <stdio.h>
#include <stdlib.h>
#include "hash.h"
#include "tools.h"

#define MAXSIZE (100)

int main(int argc,char **argv)
{
    Hash *hash = NULL;
    int *array = (int *)Malloc(sizeof(int) * MAXSIZE);
    int  i = 0;
    for(i = 0; i < MAXSIZE;++i){
        array[i] = rand() % 1000;
    }
    
    hash = init_hash(10,int_hash_func);
    for(i = 0;i < MAXSIZE;++i){
        hash_insert(hash,&array[i]);
    }
     
    print_hash_table(hash);

    free(array);
    destory_hash(&hash);
    return 0;
}

假设生成100个随机数，桶的个数为10，可看到其散列的结果:

ios 哈希表原理哈希表怎么实现的_Boo_02