哈希表

原创

Sekai_Z 2016-05-22 22:31:33 博主文章分类：数据结构 ©著作权

©著作权归作者所有：来自51CTO博客作者Sekai_Z的原创作品，请联系作者获取转载授权，否则将追究法律责任

HashTable-散列表/哈希表，是根据关键字（key）而直接访问在内存存储位置的数据结构。

它通过一个关键值的函数将所需的数据映射到表中的位置来访问数据，这个映射函数叫做散列函数，存放记录的数组叫做散列表。

直接定址法--取关键字的某个线性函数为散列地址，Hash（Key）= Key 或 Hash（Key）= A*Key + B，A、B为常数。
除留余数法--取关键值被某个不大于散列表长m的数p除后的所得的余数为散列地址。Hash（Key）= Key % P。
平方取中法
折叠法
随机数法
数学分析法
线性探测

程序1.0

#include<iostream>

using namespace std;

enum State
{
	EMPTY,
	DELETE,
	EXITS,
};
template<class T>
class HashTable
{
public:
	HashTable(size_t capacity = 10)
		: _capacity(capacity)
		,_tables(new T[_capacity])
		, _states(new State[_capacity])
		, _size(0)
		
	{
		//将状态表置空
		for (size_t i = 0; i < _capacity; i++)
		{
			_states[i] = EMPTY;

		}
		//注意：不能用memset,因为这个函数是“置位”的，它将每一位置成设置的值
	}
	HashTable(const HashTable<T>&h)
		:_tables(new T[h._capacity])
		, _states(new State[h._capacity])
		, _size(h._size)
		, _capacity(h._capacity)
	{
		for (size_t i = 0; i < _capacity; i++)
		{
			_tables[i] = h._tables[i];
			_states[i] = h._states[i];
		}
	}
	HashTable<T>&operator=(HashTable<T> h)
	{
		if (this != &h)
		{
			swap(_tables, h._tables);
			swap(_states, h._states);
			swap(_size, h._size);
			swap(_capacity, h._capacity);
		}
		return *this;
	}
	~HashTable()
	{
		if (_tables != NULL)
		{
			delete[] _tables;
			_tables = NULL;
		}
		if (_states != NULL)
		{
			delete[] _states;
			_states = NULL;
		}
		
	}
public:
	bool Insert(const T&key)//插入
	{
		if (_size == _capacity)
		{
			cout << "hashtable is full";
			return false;
		}
		size_t index = _HashFunc(key);
		//线性探测
		while (_states[index] ==EXITS)
		{
			if (_tables[index] == key)
			{
				return false;
			}
			++index;
			if (index == _capacity)//若找到尾还未找到空位则从表头继续找
			{
				index = 0;
			}
		}
		_tables[index] = key;
		_states[index] = EXITS;
		_size++;
		
	}
	bool Find(const T&key)//查找
	{
		size_t index = _HashFunc(key);
		size_t start = index;//保存起始位置，若从这个位置找了一圈还没找到则元素不存在
		while (_tables[index] != EMPTY)
		{
			if (_tables[index] == key)
			{
				if (_states[index] != DELETE)
					return true;
				else
				{
					cout << "fail";
					return false;
				}
			}
			++index;
			if (index == _capacity)
				index = 0;//找到尾还未找到则从头继续找
			if (index == start)
			{
				cout << "fail";
				return false;
			}
		}
		cout << "fail";
		return false;
		
	}
	bool Remove(const T&key)
	{
		size_t index = _HashFunc(key);
		size_t start = index;

		while (_states[index] != EMPTY)
		{
			if (_tables[index] == key)
			{
				_states[index]=DELETE;
				_size--;
				return true;
			}
			++index;
			if (index == _capacity)
				index = 0;
			if (index == start)
				return false;
		}
		return false;
	}
	void Print()
	{
		if (_tables == NULL || _states == NULL)
			cout << "EMPTY";
		for (size_t i = 0; i < _capacity; i++)
		{
			printf("(%d-%d:%d) ",i, _states[i], _tables[i]);
		}
		cout << endl;
	}
private:
	size_t _HashFunc(const T&key)//安排在哈希表中的位置
	{
		return key%_capacity;
	}
private:
	size_t _capacity;//容量
	T* _tables;//哈希表
	State* _states;//状态表
	size_t _size;//存储元素个数
	
};

哈希冲突：

不同的Key值经过哈希函数Hash(Key)处理以后可能产生相同的值哈希地址，我们称这种情况为哈希冲突。任意的散列函数都不能避免产生冲突。

散列表的荷载因子：α=填入表的元素个数/散列表长度

填入表的元素个数越多，产生冲突的可能性越大，反之，越小

荷载因子应严格限制在0.7~0.8以下，超过0.8则会导致cpu的缓存不命中率上升

故以下方法为检查容量，若荷载因子过大则进行扩容

程序2.0：

#include<iostream>

using namespace std;

enum State
{
	EMPTY,
	DELETE,
	EXITS,
};
template<class T>
class HashTable
{
public:
	HashTable(size_t capacity = 10)
		: _capacity(capacity)
		,_tables(new T[_capacity])
		, _states(new State[_capacity])
		, _size(0)
		
	{
		//将状态表置空
		for (size_t i = 0; i < _capacity; i++)
		{
			_states[i] = EMPTY;

		}
		//注意：不能用memset,因为这个函数是“置位”的，它将每一位置成设置的值
	}
	HashTable(const HashTable<T>&h)
		:_tables(new T[h._capacity])
		, _states(new State[h._capacity])
		, _size(h._size)
		, _capacity(h._capacity)
	{
		for (size_t i = 0; i < _capacity; i++)
		{
			_tables[i] = h._tables[i];
			_states[i] = h._states[i];
		}
	}
	HashTable<T>&operator=(HashTable<T> h)
	{
		if (this != &h)
		{
			swap(_tables, h._tables);
			swap(_states, h._states);
			swap(_size, h._size);
			swap(_capacity, h._capacity);
		}
		return *this;
	}
	~HashTable()
	{
		if (_tables != NULL)
		{
			delete[] _tables;
			_tables = NULL;
		}
		if (_states != NULL)
		{
			delete[] _states;
			_states = NULL;
		}
		
	}
public:
	bool Insert(const T&key)//插入
	{
		/*if (_size == _capacity)
		{
			cout << "hashtable is full";
			return false;
		}*/
		Checkcapacity();
		size_t index = _HashFunc(key);
		
		//线性探测
		while (_states[index] ==EXITS)
		{
			if (_tables[index] == key)
			{
				return false;
			}
			++index;
			if (index == _capacity)//若找到尾还未找到空位则从表头继续找
			{
				index = 0;
			}
		}
		_tables[index] = key;
		_states[index] = EXITS;
		_size++;
		
	}
	bool Find(const T&key)//查找
	{
		size_t index = _HashFunc(key);
		size_t start = index;//保存起始位置，若从这个位置找了一圈还没找到则元素不存在
		while (_tables[index] != EMPTY)
		{
			if (_tables[index] == key)
			{
				if (_states[index] != DELETE)
					return true;
				else
				{
					cout << "fail";
					return false;
				}
			}
			++index;
			if (index == _capacity)
				index = 0;//找到尾还未找到则从头继续找
			if (index == start)
			{
				cout << "fail";
				return false;
			}
		}
		cout << "fail";
		return false;
		
	}
	bool Remove(const T&key)
	{
		size_t index = _HashFunc(key);
		size_t start = index;

		while (_states[index] != EMPTY)
		{
			if (_tables[index] == key)
			{
				_states[index]=DELETE;
				_size--;
				return true;
			}
			++index;
			if (index == _capacity)
				index = 0;
			if (index == start)
				return false;
		}
		return false;
	}
	void Print()
	{
		if (_tables == NULL || _states == NULL)
			cout << "EMPTY";
		for (size_t i = 0; i < _capacity; i++)
		{
			printf("(%d-%d:%d) ",i, _states[i], _tables[i]);
		}
		cout << endl;
	}
private:
	void Checkcapacity()
	{
		if (_size * 10 / _capacity == 7)
		{
			HashTable<T>tmp(2 * _capacity);
			for (size_t i = 0; i < _capacity; i++)
			{
				if (_states[i] == EXITS)
				{
					tmp.Insert(_tables[i]);
				}
			}
			this->_swap(tmp);
		}
		
	}
	void _swap(HashTable<T>&h)
	{
		swap(_tables, h._tables);
		swap(_states, h._states);
		swap(_capacity, h._capacity);
		swap(_size, h._size);
	}
	size_t _HashFunc(const T&key)//安排在哈希表中的位置
	{
		return key%_capacity;
	}
private:
	size_t _capacity;//容量
	T* _tables;//哈希表
	State* _states;//状态表
	size_t _size;//存储元素个数
	
};

程序3,0

解决哈希冲突的另一个方法就是哈希桶，用链表来将同一位置上的数据链接在一起

#include<iostream>
#include<vector>
using namespace std;

template<class K,class V>
struct HashTableNode//结点结构
{
	V _value;
	K _key;
	HashTableNode<K, V>*_next;

	HashTableNode(const K&key, const V&value)
		:_value(value)
		, _key(key)
		, _next(NULL)
	{}
};
template<class K,class V>
class HashTable
{
public:
	HashTable()
		:_size(0)
	{
		_table.resize(_GetPrimeSize());
	}
	HashTable(const HashTable<K,V>&h)
		:_size(0)
	{
		_table.resize(h._table.size());
		_size = h._size;
		for (size_t i = 0; i < h._table.size(); i++)
		{
			HashTableNode<K, V>*cur = h._table[i];
			while (cur)
			{
				Insert(cur->_key, cur->_value);
				cur = cur->_next;
			}
		}
	}
	HashTable<K, V>&operator=(HashTable<K, V> h)
	{
		_table.swap(h._table);
		swap(_size, h._size);
		return *this;
	}
	~HashTable()
	{
		for (size_t i = 0; i < _table.size(); i++)
		{
			HashTableNode<K, V>*cur = _table[i];
			while (cur)
			{
				HashTableNode<K, V>*del = cur;
				cur = cur->_next;
				delete del;
			}
			_table[i] = NULL;
		}
	}

public:
	bool Insert(const K&key,const V&value)
	{
		_CheckCapacity();
		size_t index = _HashFunc(key, _table.size());
		HashTableNode<K, V>*cur = _table[index];
		while (cur)//检查是否存在
		{
			if (cur->_value == value)
				return false;
			cur = cur->_next;
		}
		HashTableNode<K, V>*tmp = new HashTableNode<K, V>(key, value);
		tmp->_next = _table[index];
		_table[index] = tmp;
		_size++;
	}
	HashTableNode<K, V>*Find(const K&key)
	{
		size_t index = _HashFunc(key,_table.size());
		HashTableNode<K, V>*cur = _table[index];
		while (cur)
		{
			if (cur->_key = key)
				return cur;
			else
				cur = cur->_next;
		}
	}
	bool Remove(const K&key)
	{
		size_t index = _HashFunc(key,_table.size());
		HashTableNode<K, V>*cur = _table[index];
		if (cur->_key == key)
		{
			_table[index] = cur->_next;
			delete cur;
			cur = NULL;
			_size--;
			return true;
		}
		HashTableNode<K, V>*prev = cur;
		cur = cur->_next;
		while (cur)
		{
			if (cur->_key == key)
			{
				prev->_next = cur->_next;
				delete cur;
				cur = NULL;
				_size--;
				return true;
			}
			prev = cur;
			cur = cur->_next;
		}
		return false;
	}
	void print()
	{
		size_t i = 0;
		for ( ; i < _table.size(); i++)
		{
			HashTableNode<K, V>*cur = _table[i];
			if (cur)
			{
				while (cur)
				{
					printf("%d-[%d:%d]", i, cur->_key, cur->_value);
					cur = cur->_next;
				}
			}
			else
			{
				printf("%d-[NULL:NULL]", i);
			}
			cout << endl;
		}
	}
protected:
	size_t _HashFunc(K key,size_t capacity)
	{
		return key % capacity;
	}
	void _CheckCapacity()
	{
		if (_size == _table.size())
		{
			size_t _PrimeSize = _GetPrimeSize();
			vector<HashTableNode<K, V>*>newTables;
			newTables.resize(_PrimeSize);
			for (size_t i = 0; i < _table.size(); i++)
			{
				HashTableNode<K, V>*cur = _table[i];
				while (cur)
				{
					HashTableNode<K, V>*tmp = cur;
					cur = cur->_next;
					//头插
					size_t index = _HashFunc(tmp->_key, newTables.size());
					tmp->_next = newTables[index];
					newTables[index] = tmp;
				}
				_table[i] = NULL;
			}
			_table.swap(newTables);
		}
	}
	unsigned long _GetPrimeSize()//使哈希冲突最小化
	{
		const int _PrimeSize = 28;
		static const unsigned long _PrimeList[_PrimeSize] =
		{
			53ul, 97ul, 193ul, 389ul, 769ul,
			1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
			49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
			1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
			50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		};
		size_t i = 0;
		for ( i = 0; i < _PrimeSize; i++)
		{
			if (_table.size() < _PrimeList[i])
				return _PrimeList[i];
		}
		return _PrimeList[_PrimeSize];
	}
private:
	vector<HashTableNode<K, V>*>_table;//顺序表内保存结点
	size_t _size;//元素个数
};