哈希是一种算法,将指定的数据按一定规律映射到一段空间内,又可以按照这种规律对它的值进行相应的操作,这一段空间可以称作哈希表,它的的查找速度要快于线性的数据结构,同时也快于表格队列等,所以它具有独特的优势,一般将哈希算法用于快速查找和加密算法。

   对于最简单的哈希表,里面设置一个key,它决定将这个值存于哈希表的什么位置,同时把每个设置一个状态,如果有插入数据就将其设置为EXITS,其他操作同理,现在可以实现最简单的哈希表。

namespace First

{

enum State

{

EMPTY,

DELETE,

EXITS

};


template <typename T>

class HashTable

{

public:

HashTable(size_t capacity = 10)//构造

:_capacity(capacity)

, _tables(new T[_capacity])

, _states(new State[_capacity])

, _size(0)

{

for (int i = 0; i < _capacity; i++)//最初始得状态置成空的

{

_states[i] = EMPTY;

}

}


~HashTable()//析构

{

delete[] _tables;

delete[] _states;

}


HashTable(const HashTable<T>& h)//拷贝构造

:_capacity(h._capacity)

, _tables(new T[h._capacity])

, _states(new State[h._capacity])

, _size(h._size)

{

for (int i = 0; i < h._capacity; i++)

{

_tables[i] = h._tables[i];

_states[i] = h._states[i];

}

}


HashTable& operator=(HashTable<T> h)//赋值运算符重载

{

if (this != &h)

{

swap(_tables, h._tables);

swap(_states, h._states);

swap(_capacity, h._capacity);

swap(_size, h._size);

}

return *this;

}


bool Insert(const T& key)//插入

{

if (_size == _capacity)

{

cout << "HashTable full" << endl;

return false;

}

int index = HashFunc(key);

int start = index;


while (_states[index] == EXITS)//往后线形探测

{


if (_tables[index] == key)//有相等的

{

return false;

}

index++;

if (index == _capacity)//最后一个

{

index = 0;

}



if (index == start)//找了一圈没找到

{

return false;

}

}


_tables[index] = key;

_states[index] = EXITS;

_size++;

}


bool Find(const T& key)//查找

{

int index = HashFunc(key);

int start = index;


while (_states[index] != EMPTY)

{

if (_tables[index] == key)

{

if (_states[index] != DELETE)

{

cout << "find succees" << endl;

return true;

}

else

{

cout << "find fail" << endl;

return false;

}

}

index++;

if (index == _capacity)

{

index = 0;

}

if (start == index)

{

cout << "find fail" << endl;

return false;

}

}

cout << "find fail" << endl;

return false;

}


bool Remove(const T& key)///删除

{

int index = HashFunc(key);

int start = index;


while (_states[index] != EMPTY)

{

if (_tables[index] == key)

{

if (_states[index] != DELETE)

{

cout << "delete key" << endl;

_states[index] = DELETE;

return true;

}

else

{

cout << "delete fail" << endl;

return false;

}

}

index++;

if (index == _capacity)

{

index = 0;

}

if (start == index)

{

return false;

}

}

cout << "delete fail" << endl;

return true;

}


void Print()//打印哈希表

{

for (int i = 0; i < _capacity; i++)

{

cout << '[' << _tables[i] << ',' << _states[i] << ']' << ' ';

}

cout << endl;

}


protected:

int HashFunc(const T& key)

{

return key%_capacity;

}


private:

size_t _capacity;

T* _tables;

State* _states;

size_t _size;

};

}

/**************************************/

从上面的代码可以看出,这个哈希表并不适用于实际,因为首先它是一个静态的,如果存入的key值过多就会造成越界访问,同时用的是线性探测方法,这样降低了cpu的访问命中率,现在可以实现一种动态的而且随意设置负载因子的功能。

namespace Second//因为有负载因子的限制,可以提高cpu访问命中率

{

enum State

{

EMPTY,

DELETE,

EXITS

};


template <typename T>

class HashTable

{

public:

HashTable(size_t capacity = 30)//构造

:_capacity(capacity)

, _tables(new T[_capacity])

, _states(new State[_capacity])

, _size(0)

{

for (int i = 0; i < _capacity; i++)//最初始得状态置成空的

{

_states[i] = EMPTY;

}

}


~HashTable()//析构

{

delete[] _tables;

delete[] _states;

}


HashTable(const HashTable<T>& h)//拷贝构造

:_capacity(h._capacity)

, _tables(new T[h._capacity])

, _states(new State[h._capacity])

, _size(h._size)

{

for (int i = 0; i<h._capacity; i++)

{

_tables[i] = h._tables[i];

_states[i] = h._states[i];

}

}


HashTable& operator=(HashTable<T> h)//赋值运算符重载

{

if (this != &h)

{

swap(_tables, h._tables);

swap(_states, h._states);

swap(_capacity, h._capacity);

swap(_size, h._size);

}

return *this;

}


//bool Insert(const T& key)//插入(线性探测)

//{

//_CheckCapacity();

//int index = _HashFunc(key);

//int start = index;


//while (_states[index]==EXITS)

//{

//if (_tables[index] == key)

//{

//return false;

//}

//index++;

//if (index == _capacity)

//{

//index = 0;

//}

//if (index == start)

//{

//return false;

//}

//

//}

//_tables[index] = key;

//_states[index] = EXITS;

//_size++;

//}


bool Insert(const T& key)//插入(二次探测,即某个数的二次方,这样数据存着更稀疏)

{

_CheckCapacity();

int index = _HashFunc(key);

int start = index;

int i = 0;


while (_states[index]==EXITS)

{

if (_tables[index] == key)

{

return false;

}

index = _HashFuncT(index, ++i);

if (start = index)

{

return false;

}

if (index == _capacity)

{

index = 0;

}

}

_tables[index] = key;

_states[index] = EXITS;

_size++;

}


bool Find(const T& key)//查找

{

int index = _HashFunc(key);

int start = index;

int i = 0;


while (_states[index]!=EMPTY)

{

if (_tables[index] == key)

{

if (_states[index] != DELETE)

{

cout << "find success" << endl;

return true;

}

else

{

cout << "find fail" << endl;

return false;

}

}


index = _HashFuncT(index, ++i);

if (start = index)

{

cout << "find fail" << endl;

return false;

}

if (index == _capacity)

{

index = 0;

}

}

cout << "find fail" << endl;

return false;

}


bool Remove(const T& key)///删除

{

int index = _HashFunc(key);

int start = index;

int i = 0;


while (_states[index] == EXITS)

{

if (_tables[index] == key)

{

_states[index] = DELETE;

_size--;

return true;

}


index = _HashFuncT(index, ++i);

if (start == index)

{

return false;

}

if (index == _capacity)

{

index = 0;

}

}

return false;

}


void Print()//打印哈希表

{

for (int i = 0; i < _capacity; i++)

{

cout << '[' << _tables[i] << ',' << _states[i] << ']' << ' ';

}

cout << endl;

}


protected:

int _HashFuncT(int index,int i)

{

return (index + i*i) % _capacity;

}


int _HashFunc(const T& key)

{

return key%_capacity;

}


void _CheckCapacity()//检查容量

{

if ((10 * _size)/ _capacity == 6)//负载因子设为0.6

{

HashTable<T> tmp(2 * _capacity);

for (int i = 0; i < _capacity; i++)

{

if (_states[i]==EXITS)

{

tmp.Insert(_tables[i]);

}

}

_swap(tmp);

}

}


void _swap(HashTable<T> h)

{

swap(_tables, h._tables);

swap(_states, h._states);

swap(_capacity, h._capacity);

swap(_size, h._size);

}

private:

size_t _capacity;

T* _tables;

State* _states;

size_t _size;

};

}

/****************************************/

上面的代码对于key形式的相对第一种已经比较健全了。现在可以利用哈希算法可以实现一种key/value形式的功能,可以支持字典功能,key是一个信息,同时value是key的一个附带信息,比如说key为学号,那么班级就是附带的信息value,例如还有简单的英汉字典形式,现进行简单的实现。

namespace Third//支持字典形式的

{

enum State

{

EMPTY,

DELETE,

EXITS

};

template<class T,class V>

struct HashTableNode

{

HashTableNode()

{}


HashTableNode(const T& key, const V& value)

:_key(key)

, _value(value)

{}

T _key;

V _value;

};


template <class T>

struct __HashFunc

{

size_t operator()(const T& key)

{

return key;

}

};


//实现key,value形式,并且是二次探测的

template <class T ,class V,class HashFunc=__HashFunc<T>>

class Dictionary

{

public:

Dictionary(size_t capacity=10)

:_capacity(capacity)

, _tables(new HashTableNode<T,V> [_capacity])

, _states(new State[_capacity])

,_size(0)

{

for (int i = 0; i < _capacity; i++)

{

_states[i] = EMPTY;//将最开始的状态置为空

}

}


~Dictionary()

{

delete[] _tables;

delete[] _states;

}



bool Insert(const T& key,const V& value)

{

_CheckCapacity();

int index = _HashFunonce(key);

int start = index;

int i = 0;


while (_states[index] == EXITS)

{

if (_tables[index]._key == key)

{

return false;

}

index = _HashFuntwice(index, ++i);

if (index == _capacity)

{

index = 0;

}

if (index == start)

{

return false;

}

}

_tables[index] = HashTableNode<T, V>(key, value);

_states[index] = EXITS;

_size++;

return true;

}


HashTableNode<T,V>* Find(const T& key)

{

int index = _HashFunonce(key);

int start = index;

int i = 0;


while (_states[index]==EXITS)

{

if (_tables[index]._key == key)

{

cout << "find success" << endl;

return _tables+index;

}

index = _HashFuntwice(index, ++i);

if (start == index)

{

cout << "find fail" << endl;

return NULL;

}

}

cout << "find fail" << endl;

return NULL;

}


bool Remove(const T& key)

{

int index = _HashFunonce(key);

int start = index;

int i = 0;


while (_states[index]!=EMPTY)

{

if (_tables[index]._key == key)

{

if (_states[index]!=DELETE)

{

_states[index] = DELETE;

_size--;

return true;

}

else

{

return false;

}

}

index = _HashFuntwice(index, ++i);

if (index == start)

{

return false;

}

}


return false;

}


void Print()

{

for (int i = 0; i < _capacity; i++)

{

cout << "[" << _tables[i]._key << "," << _tables[i]._value <<","<< _states[i]<<"]" << " ";

}

cout << endl;

}


protected:

void _CheckCapacity()//将负载因子设为0.6

{

if (_size * 10 / _capacity == 6)

{

Dictionary<T, V, HashFunc> tmp(2 * _capacity);

for (int i = 0; i < _capacity; i++)

{

if (_states[i] == EXITS)

{

tmp.Insert(_tables[i]._key,_tables[i]._value);

}

}

_Swap(tmp);

}


}


void _Swap(Dictionary<T, V, HashFunc> tmp)

{

swap(_tables, tmp._tables);

swap(_states, tmp._states);

swap(_capacity, tmp._capacity);

swap(_size, tmp._size);

}


size_t _HashFunonce(const T& key)

{

return key %_capacity;

}



size_t _HashFuntwice(int index,int i)//获取二次探测的下标

{

return (index + i*i) % _capacity;

}

private:

size_t _capacity;

HashTableNode<T,V>* _tables;

State* _states;

size_t _size;

};

}


void test3()//二次探测,负载因子,实现字典的功能

{

/*Third::Dictionary<int, string> h1;

h1.Insert(10, "c语言基础");

h1.Insert(59, "c++基础");

h1.Insert(9, "数据结构");

h1.Insert(19, "Linux");

h1.Insert(18, "网络编程");*/



Third::Dictionary<int,int>h1;

h1.Insert(10, 1);

h1.Insert(59, 2);

h1.Insert(9, 3);

h1.Insert(19,4);

h1.Insert(18, 5);

//h1.Print();


cout<<h1.Find(9)->_value<<endl;



//h1.Remove(9);

//h1.Remove(19);

//h1.Remove(10);

//h1.Print();


}

上述就是对哈希算法的简单应用。