散列表(Hash table,也叫哈希表),是根据关键码值(Key value)而直接进行访问的数据结构。也就是说,它通过把关键码值映射到表中一个位置来访问记录,以加快查找的速度。这个映射函数叫做散列函数,存放记录的数组叫做散列表。
给定表M,存在函数f(key),对任意给定的关键字值key,代入函数后若能得到包含该关键字的记录在表中的地址,则称表M为哈希(Hash)表,函数f(key)为哈希(Hash) 函数。
常见的一种哈希表:
本章中的哈希表是将两种数据结构结合构成,数组和双端链表,对于哈希算法来说,其核心是在哈希函数的设计,哈希函数使得所有的键值均匀的进行散列,使得查找速度提到。
hash表的具体实现:
我们在hash.h中定义了Hash结构体和哈希的一些接口声明,其具体实现如下:
<pre name="code" class="plain"><span style="font-size:18px;"><strong>1 #ifndef _HASH_H_
2 #define _HASH_H_
3
4 #include "dlist.h"
5
6 typedef int (*Hash_func)(const void *key);
7
8 typedef struct Hash{
9 Dlist **table; //1.hash表的存储实体
10 int bucket_count; //2.hash表桶的数量
11 int element_count; //3.hash表元素数量
12
13 void (*hash_free)(const void *ptr);
14 Boolean (*hash_match)(const void *value1,
15 const void *value2);
16 int (*hash_func)(const void *key); //把元素均匀hash
17 }Hash;
18
19 //hash表的接口
20 Hash *init_hash(int buck_count, Hash_func hash_func); //hash初始化
21 void destory_hash(Hash **hash); //hash销毁
22 Boolean hash_insert(Hash *hash,const void *value); //hash插入
23 Boolean hash_serach(Hash *hash,const void *value); //hash查找
24 Boolean hash_remove(Hash *hash,const void *value); //hash删除
25 void print_hash_table(const Hash *hash); //打印hash表
26 int get_element_count(const Hash *hash); //得到hash元素个数
27
28
29 int int_hash_func(const void *key); //hash函数
30 #endif</strong></span>
hash.c的具体实现如下:
<pre name="code" class="plain"><span style="font-size:18px;"><strong>#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include "hash.h"
Hash *init_hash(int buck_count, Hash_func hash_func) //hash初始化
{
Hash *hash = (Hash *)Malloc(sizeof(Hash));
hash ->bucket_count = buck_count;
hash ->element_count = 0;
hash ->hash_free = NULL;
hash ->hash_match = NULL;
hash ->hash_func = hash_func;
//先申请各个桶的地址,但不对桶初始化,直到该桶被使用的时候
//再临时进行初始化
hash ->table = (Dlist **)Malloc(sizeof(Dlist *) * buck_count);
bzero(hash ->table,sizeof(Dlist *) *buck_count);
return hash;
}
void destory_hash(Hash **hash) //hash销毁
{
int i = 0;
int bucket_size = 0;
if(hash == NULL || *hash == NULL){
return ;
}
//销毁步骤:
//1.先销毁双端链表的节点信息(销毁桶)
//2.销毁table
//3.销毁hash
bucket_size = (*hash) ->bucket_count;
for( i = 0; i < bucket_size;++i){
destroy_dlist(&(*hash) -> table[i]);
}
free((*hash)->table);
free(*hash);
*hash = NULL;
}
Boolean hash_insert(Hash *hash,const void *value) //hash插入
{
int bucket = 0;
//如果hash表不存在或者元素已经插入到hash表中,则不进行插入操作
if(hash == NULL || value == NULL ||
hash_serach(hash,value) == TRUE){
return FALSE;
}
//通过hash函数判断将元素插入到指定下标的桶内
bucket = hash ->hash_func(value) % hash ->bucket_count;
//采用头部插入方法
push_front(hash ->table[bucket],(void *)value);
hash ->element_count++;
return TRUE;
}
Boolean hash_serach(Hash *hash,const void *value) //hash查找
{
int bucket = 0;
Dlist_node *p_node = NULL;
if(hash == NULL || value == NULL){
return FALSE;
}
bucket = hash ->hash_func(value) % hash ->bucket_count;
//如果这个桶不存在,则需对桶进行初始化
if(hash ->table[bucket] == NULL){
hash ->table[bucket] = init_dlist();
return FALSE;
}
//查找操作
if(hash ->hash_match == NULL){
for(p_node = hash ->table[bucket] ->head;
p_node;p_node = p_node ->next){
if(p_node ->data == (void *)value){
return TRUE;
}
}
}else{
for(p_node = hash ->table[bucket] ->head;
p_node;p_node = p_node ->next){
if(hash ->hash_match(p_node ->data,value)){
return TRUE;
}
}
}
return FALSE;
}
Boolean hash_remove(Hash *hash,const void *value) //hash删除
{
int bucket = 0;
Dlist_node *p_node = NULL;
if(hash == NULL || value == NULL
|| hash ->element_count == 0){
return FALSE;
}
//找到元素所在的桶
bucket = hash ->hash_func(value) % hash ->bucket_count;
if(hash ->hash_match){
for(p_node = hash ->table[bucket] ->head;p_node;p_node = p_node ->next){
if(hash ->hash_match(p_node ->data,value)){
remove_dlist_node(hash ->table[bucket],p_node);
hash ->element_count--;
return TRUE;
}
}
}else{
for(p_node = hash ->table[bucket] ->head;p_node;p_node = p_node ->next){
if(p_node ->data == value){
remove_dlist_node(hash ->table[bucket],p_node);
hash ->element_count--;
return TRUE;
}
}
}
return FALSE;
}
void print_hash_table(const Hash *hash) //打印hash表
{
int i = 0;
int bucket_size = 0;
if(hash == NULL || hash ->element_count == 0){
return ;
}
bucket_size = hash ->bucket_count;
//从下标为0的位置开始遍历
for(i = 0;i < hash ->bucket_count; ++i){
printf("bucket[%d]:",i);
print_dlist(hash ->table[i],print_int);
}
}
int get_element_count(const Hash *hash) //得到hash元素个数
{
if(hash == NULL){
return -1;
}
return hash ->element_count;
}
int int_hash_func(const void *key)
{
return *(int *)key;
}
</strong></span>
在哈希表中的每个槽中都记录了双端链表的控制信息,dlist.c和dlist.h的具体实现如下:
<span style="font-size:18px;"><strong>dlist.h :
#ifndef _DLIST_H_
#define _DLIST_H_
#include "tools.h"
enum Count{
ZERO,
ONE,
};
typedef void (*Print_func)(void *value);
//链表节点类型
typedef struct Dlist_node{
struct Dlist_node *prev; //前驱
struct Dlist_node *next; //后继
void *data; //可以接收任意类型指针(达到通用的效果)
}Dlist_node;
typedef struct Dlist{
struct Dlist_node *head; //头节点
struct Dlist_node *tail; //尾节点
int count; //数量
//data所指向内容的释放策略
void (*free)(void *ptr);
//data所指向内容的相等策略
Boolean (*match)(void *value1, void *value2);
//data所指向内容的拷贝策略
void *(*copy_node)(void *value);
}Dlist;
//通用链表的接口定义
Dlist *init_dlist(void) ; //双端链表的初始化
void destroy_dlist(Dlist **dlist); //双端链表的销毁
Boolean push_front(Dlist *dlist, void *value); //头插
Boolean push_back(Dlist *dlist, void *value) ; //尾插
Boolean pop_front(Dlist *dlist); //头删
Boolean pop_back(Dlist *dlist) ; //尾删
Dlist_node *find_node(Dlist *dlist, void *value); //节点查找
Boolean insert_prev(Dlist *dlist, Dlist_node *node,
void *value); //插入到指定节点前边
Boolean insert_next(Dlist *dlist, Dlist_node *node,
void *value); //插入到指定节点后边
Boolean remove_dlist_node(Dlist *dlist, Dlist_node *node); //删除指定节点
void print_dlist(Dlist *dlist, Print_func print) ; //链表的打印
Boolean get_front(Dlist *dlist, void **value); //得到头节点的data
Boolean get_tail(Dlist *dlist, void **value) ; //得到尾节点的data
int get_dlist_count(Dlist *dlist); //得到链表数量
#endif
</strong></span>
dlist.c:
<span style="font-size:18px;"><strong>#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include "dlist.h"
#include "tools.h"
Dlist *init_dlist(void) //双端链表的初始化
{
Dlist *dlist = (Dlist *)Malloc(sizeof(Dlist));
bzero(dlist,sizeof(Dlist));
return dlist;
}
void destroy_dlist(Dlist **dlist) //双端链表的销毁
{
if(dlist == NULL || *dlist == NULL){
return ;
}
//销毁步骤:
//1.先销毁链表节点(头删)
while((*dlist) -> count){
pop_front(*dlist);
}
//2.再销毁链表的控制信息
free(*dlist);
*dlist = NULL;
}
Boolean push_front(Dlist *dlist,void *value) //头插
{
Dlist_node * p_node = NULL;
if(dlist == NULL || value == NULL){
return FALSE;
}
p_node = (Dlist_node *)Malloc(sizeof(Dlist_node));
p_node ->data = value;
if(dlist ->count == ZERO){
dlist ->head = dlist ->tail = p_node;
}else{
p_node ->next = dlist ->head;
dlist ->head ->prev = p_node;
dlist ->head = p_node;
}
dlist ->count++;
return TRUE;
}
Boolean push_back(Dlist *dlist,void *value) //尾插
{
Dlist_node *p_node = NULL;
if(dlist == NULL || value == NULL){
return FALSE;
}
p_node = (Dlist_node *)Malloc(sizeof(Dlist_node));
p_node ->data = value;
p_node ->next = NULL;
if(dlist ->count == ZERO){
dlist ->head = dlist ->tail = p_node;
}else{
dlist ->tail ->next = p_node;
p_node ->prev = dlist ->tail;
dlist ->tail = p_node;
}
dlist ->count++;
return TRUE;
}
Boolean pop_front(Dlist *dlist) //头删
{
Dlist_node *p_node = NULL;
if(dlist == NULL || dlist ->count == ZERO){
return FALSE;
}
p_node = dlist ->head;
if(dlist ->count == ONE){
dlist ->head = dlist ->tail = NULL;
}else{
dlist ->head = dlist ->head ->next;
dlist ->head ->prev = NULL;
}
//释放节点,要对free指针做判断(data指向是堆还是栈)
if(dlist ->free){
dlist ->free(p_node ->data);
}
free(p_node);
dlist ->count--;
return TRUE;
}
Boolean pop_back(Dlist *dlist) //尾删
{
Dlist_node *p_node = NULL;
if(dlist == NULL || dlist ->count == ZERO){
return FALSE;
}
p_node = dlist ->tail;
if(dlist ->count == ONE){
dlist ->head = dlist ->tail = NULL;
}else{
dlist ->tail = p_node ->prev;
dlist ->tail ->next = NULL;
}
if(dlist ->free){
dlist ->free(p_node);
}
free(p_node);
dlist ->count--;
return TRUE;
}
Dlist_node *find_node(Dlist *dlist,void *value) //节点查找
{
Dlist_node *p = NULL;
if(dlist == NULL || dlist ->count == ZERO
|| value == NULL){
return NULL;
}
if(dlist ->match){
for(p = dlist ->head; p;p = p ->next){
if(!dlist ->match(p ->data,value)){
return p;
}
}
}else{
for(p = dlist ->head; p;p = p ->next){
if(p ->data == value){
return p;
}
}
}
return p;
}
Boolean insert_prev(Dlist *dlist,Dlist_node *node,
void *value) //插入到指定节点前边
{
Dlist_node *p_node = NULL;
if(dlist == NULL || node == NULL || value == NULL){
return FALSE;
}
p_node = (Dlist_node *)Malloc(sizeof(Dlist_node));
p_node ->data = value;
p_node ->next = node;
p_node ->prev = node ->prev;
//当前链表只有一个节点(它是新的头结点)
if(node ->prev == NULL){
dlist ->head = p_node;
}else{
node ->prev ->next = p_node;
}
node ->prev = p_node;
dlist ->count++;
return TRUE;
}
Boolean insert_next(Dlist *dlist,Dlist_node *node,
void *value) //插入到指定节点后边
{
Dlist_node *p_node = NULL;
if(dlist == NULL || node == NULL || value == NULL){
return FALSE;
}
p_node = (Dlist_node *)Malloc(sizeof(Dlist_node));
p_node ->data = value;
p_node ->next = NULL;
p_node ->prev = node;
p_node ->next = node ->next;
if(node ->next == NULL){
dlist ->tail = p_node;
}else{
node ->next ->prev = p_node;
}
node ->next = p_node;
dlist ->count++;
return TRUE;
}
Boolean remove_dlist_node(Dlist *dlist,Dlist_node *node) //删除指定节点
{
if(dlist == NULL || dlist ->count == ZERO || node == NULL){
return FALSE;
}
if(node == dlist ->tail){
pop_back(dlist);
}else if(node == dlist ->head){
pop_front(dlist);
}else{
node ->prev ->next = node ->next;
node ->next ->prev = node ->prev;
if(dlist ->free){
dlist ->free(node ->data);
}
free(node);
dlist ->count--;
return TRUE;
}
}
void print_dlist(Dlist *dlist,Print_func print) //链表的打印
{
Dlist_node *p_node = NULL;
if(dlist != NULL && dlist ->count && print != NULL){
for(p_node = dlist ->head;p_node;p_node = p_node ->next){
print(p_node ->data);
}
printf("\n");
}
}
Boolean get_front(Dlist *dlist,void **value) //得到头结点的data
{
Dlist_node *p_node = NULL;
if(dlist == NULL || value == NULL || dlist ->count == ZERO){
return FALSE;
}
p_node = dlist ->head;
*value = dlist ->head ->data;
return TRUE;
}
Boolean get_tail(Dlist *dlist,void **value) //得到尾节点的data
{
Dlist_node *p_node = NULL;
if(dlist == NULL || value == NULL || dlist ->count == ZERO){
return FALSE;
}
p_node = dlist ->tail;
*value = dlist ->tail ->data;
return TRUE;
}
int get_dlist_count(Dlist *dlist) //得到链表数量
{
if(dlist == NULL){
return -1;
}
return dlist ->count;
}
</strong></span>
在dlist.c中包含tools.h ,该头文件为一个工具头文件,其中包括包裹函数Malloc和一些简单的函数
其具体实现如下:
tools.h :
<span style="font-size:18px;"><strong>#ifndef _TOOLS_H_
#define _TOOLS_H_
//定义布尔类型
#define TRUE (1)
#define FALSE (0)
typedef unsigned char Boolean;
//定义接口
void *Malloc(size_t size);
void print_int(void *value);
#endif
</strong></span>
tools.c :
<span style="font-size:18px;"><strong>#include <stdio.h>
#include <stdlib.h>
#include "tools.h"
void *Malloc(size_t size)
{
void *result = malloc(size);
if(result == NULL){
fprintf(stderr,"the memory is full!\n");
exit(1);
}
return result;
}
void print_int(void *value)
{
int *p = (int *)value;
printf("%5d",*p);
}
</strong></span>
测试程序:
#include <stdio.h>
#include <stdlib.h>
#include "hash.h"
#include "tools.h"
#define MAXSIZE (100)
int main(int argc,char **argv)
{
Hash *hash = NULL;
int *array = (int *)Malloc(sizeof(int) * MAXSIZE);
int i = 0;
for(i = 0; i < MAXSIZE;++i){
array[i] = rand() % 1000;
}
hash = init_hash(10,int_hash_func);
for(i = 0;i < MAXSIZE;++i){
hash_insert(hash,&array[i]);
}
print_hash_table(hash);
free(array);
destory_hash(&hash);
return 0;
}
假设生成100个随机数,桶的个数为10,可看到其散列的结果:
哈哈 终于弄出来了!!!