哈希函数的优化

一、字符串转换
youhua.h
//哈希函数的优化:
//一、将静态的改为动态
# pragma once
# include<stdio.h>
# include<string.h>
# include<stdlib.h>
# include<assert.h>
#define MAX_SIZE 10
typedef int DataType;
typedef enum{
	EXIST, EMPTY, DELETE
}State;
typedef struct HTElem
{
	DataType _data;
	State _state;
}HTElem;
typedef struct HashTable
{
	HTElem *_array;//改为动态
	int _capacity;//容量
 	int _size;//计算哈希表中有效元素的个数
	int _IsLineDetective;//是否为线性探测

}HashTable, HT;
void HashTableInit(HT *ht, int capacity,int IsLineDetetive);
void HashTableInsert(HT *ht, DataType data);
//删除
void HashTableDelete(HT *ht, DataType data);
//查找
int HashTableFind(HT *ht, DataType data);
void Detective2(int hashAddr, int i);//二次探测
int DetectiveLine(int hashAddr);//线性探测
void Swap(HT *lht, HT *rht);
//计算哈希表格中的元素个数
int HashTableSize(HT *ht);
//判断哈希表格是不是空的
int HashTableEmpty(HT *ht);
int CheckCapacity(HT *ht);
void HashTableDestroy(HT *ht);
int HashFunc(DataType data);




youhua.c
# include"youhua.h"
# include"Common.h"
void HashTableInit(HT *ht, int capacity, int IsLineDetetive)
{
	int i = 0;
	ht->_array = (HTElem*)malloc(capacity*sizeof(HTElem));//开辟空间:变为动态的
	if (NULL == ht->_array)
	{
		assert(0);
		return;
	}
	//把位置给成空
	for (; i < capacity; ++i)
		ht->_array[i]._state = EMPTY;
	ht->_size = 0;
	ht->_capacity = capacity;
	ht->_IsLineDetective = IsLineDetetive;
}
void HashTableInsert(HT *ht, DataType data)
{
	int hashAddr = -1;
	int i = 0;
	assert(ht);
	if (!CheckCapacity(ht))
		return;
	HashTableDestroy(&data);
	//计算哈希地址
	hashAddr = HashFunc(data);
}
int CheckCapacity(HT *ht)
{
	assert(ht);
	//开辟新空间
	if (ht->_size * 10 / ht->_capacity >= 7)//判断是否增容:与哈希负载因子比较
	{
		int NewCapacity = ht->_capacity * 2;//新容量增容到旧容量的两倍
		/*HTElem *pTmp = (HTElem*)malloc(NewCapacity*sizeof(DataType));
		if (NULL == pTmp)
		{
			assert(0);
			return 0;
		}*/
		//
		HT NewHt;//新哈希表
		int i = 0;
		HashTableInit(&NewHt, NewCapacity, ht->_IsLineDetective);
		//拷贝元素(旧哈希表中的有效元素插入新哈希表)
		for (; i < ht->_size; ++i)
		{
			if (ht->_array[i]._state == EXIST)
				HashTableInsert(&NewHt, ht->_array[i]._data);//将旧哈希表中i号位置的元素插入新哈希标中
		}
		Swap(&ht, &NewHt);//交换新哈希表和旧哈希表中的内容
		HashTableDestroy(&NewHt);//销毁新哈希表的内容
	}
	return 1;
}
void Swap(HT *lht, HT *rht)
{
	int tmp;
	assert(lht);
	assert(rht);
	//交换capacity
	tmp = lht->_capacity;
	lht->_capacity = rht->_capacity;
	rht->_capacity = tmp;
	//交换size
	tmp = lht->_size;
	lht->_size = rht->_size;
	rht->_size = tmp;
	//交换IsDetective
	tmp = lht->_IsLineDetective;
	lht->_IsLineDetective = rht->_IsLineDetective;
	rht->_IsLineDetective = tmp;
	//交换array
	tmp = (int)lht->_array;
	lht->_array = rht->_array;
	rht->_array = (HTElem*)tmp;
}
void HashTableDestroy(HT *ht)
{
	assert(ht);
	if (ht->_array)//如果有空间
	{
		free(ht->_array);//释放空间
		ht->_array = NULL;
		ht->_capacity = 0;//容量清零
		ht->_size = 0;
	}
}
void HashTableDelete(HT *ht, DataType data)
{
	int ret = -1;
	assert(ht);
	ret = HashTableFind(ht, data);
	if (-1 != ret)
	{
		ht->_array[ret]._state = DELETE;
		ht->_size--;
	}
}
//查找
int HashTableFind(HT *ht, DataType data)
{
	int hashAddr = -1;
	int startAddr = -1;//开始查找的地址
	int i = 0;
	assert(ht);
	hashAddr = HashFunc(data);
	startAddr = hashAddr;
	while (ht->_array[hashAddr]._state != EMPTY)
	{
		if (EXIST == ht->_array[hashAddr]._state)
		{
			if (data == ht->_array[hashAddr]._data)
				return hashAddr;
		}
		//hashAddr++;//没有找到,继续往后找
		//if (hashAddr == MAX_SIZE)//越界了,从头再找
		//	hashAddr = 0;
		//if (hashAddr == startAddr)//哈希地址等于开始查找的起始地址,说明找了一圈都没有找到
		//	return -1;
		if (ht->_IsLineDetective)
		{
			DetectiveLine(hashAddr);
			//找了一圈没有找到
			if (hashAddr == startAddr)
				return -1;
		}
		else
			Detective2(hashAddr, ++i);
	}
	return -1;//没有找到
}
//计算哈希表格中的元素个数
int HashTableSize(HT *ht)
{
	return ht->_size;
}
//判断哈希表格是不是空的
int HashTableEmpty(HT *ht)
{
	assert(ht);
	return 0 == ht->_size;
}
int DetectiveLine(int hashAddr)//线性探测
{
	hashAddr++;
	if (hashAddr == MAX_SIZE)
		hashAddr = 0;
	return hashAddr;
}
void Detective2(int hashAddr, int i)//二次探测
{
	hashAddr = hashAddr + 2 * i + 1;
	if (hashAddr >= MAX_SIZE)//越界了
		hashAddr %= MAX_SIZE;//越界之后模回来
	return hashAddr;
}
int HashFunc(DataType data)
{
	return StrToInt(data) % MAX_SIZE;
}

common.h
# pragma once
typedef unsigned int size_t;//# include<stddef.h>
////字符串转换为整型
unsigned int StrToInt(const char * str);

common.c

# define _CRT_SECURE_NO_WARNINGS 1
# include"Common.h"
 
unsigned int StrToInt(const char * str)
{
	unsigned int seed = 131; // 31 131 1313 13131 131313
	unsigned int hash = 0;
	while (*str)
	{
		hash = hash * seed + (*str++);
	}
	return (hash & 0x7FFFFFFF);
}

二、使用素数进行除留取余法

sushu.h

//哈希函数的优化:
//一、将静态的改为动态
# ifndef __SUSHU_H__
# define __SUSHU_H__
# include<stdio.h>
# include<string.h>
# include<stdlib.h>
# include<assert.h>
#define MAX_SIZE 10
typedef int DataType;
typedef enum{
	EXIST, EMPTY, DELETE
}State;
typedef struct HTElem
{
	DataType _data;
	State _state;
}HTElem;
typedef struct HashTable
{
	HTElem *_array;//改为动态
	int _capacity;//容量
	int _size;//计算哈希表中有效元素的个数
	int _IsLineDetective;//是否为线性探测

}HashTable, HT;
void HashTableInit(HT *ht, int capacity, int IsLineDetetive);
void HashTableInsert(HT *ht, DataType data);
//删除
void HashTableDelete(HT *ht, DataType data);
//查找
int HashTableFind(HT *ht, DataType data);
void Detective2(int hashAddr, int i);//二次探测
int DetectiveLine(int hashAddr);//线性探测
void Swap(HT *lht, HT *rht);
//计算哈希表格中的元素个数
int HashTableSize(HT *ht);
//判断哈希表格是不是空的
int HashTableEmpty(HT *ht);
int CheckCapacity(HT *ht);
void HashTableDestroy(HT *ht);
int HashFunc(DataType data);

#endif


sushu.c

# include"sushu.h"
# include"Common.h"
void HashTableInit(HT *ht, int capacity, int IsLineDetetive)
{
	int i = 0;
	ht->_array = (HTElem*)malloc(capacity*sizeof(HTElem));//开辟空间:变为动态的
	if (NULL == ht->_array)
	{
		assert(0);
		return;
	}
	//把位置给成空
	for (; i < capacity; ++i)
		ht->_array[i]._state = EMPTY;
	ht->_size = 0;
	ht->_capacity = capacity;
	ht->_IsLineDetective = IsLineDetetive;
}
void HashTableInsert(HT *ht, DataType data)
{
	int hashAddr = -1;
	int i = 0;
	assert(ht);
	if (!CheckCapacity(ht))
		return;
	HashTableDestroy(&data);
	//计算哈希地址
	hashAddr = HashFunc(data);
}
int CheckCapacity(HT *ht)
{
	assert(ht);
	//开辟新空间
	if (ht->_size * 10 / ht->_capacity >= 7)//判断是否增容:与哈希负载因子比较
	{
		int NewCapacity = GetNextPrime(ht->_capacity*2);//新容量增容到旧容量的两倍
		/*HTElem *pTmp = (HTElem*)malloc(NewCapacity*sizeof(DataType));
		if (NULL == pTmp)
		{
		assert(0);
		return 0;
		}*/
		//
		HT NewHt;//新哈希表
		int i = 0;
		HashTableInit(&NewHt, NewCapacity, ht->_IsLineDetective);
		//拷贝元素(旧哈希表中的有效元素插入新哈希表)
		for (; i < ht->_size; ++i)
		{
			if (ht->_array[i]._state == EXIST)
				HashTableInsert(&NewHt, ht->_array[i]._data);//将旧哈希表中i号位置的元素插入新哈希标中
		}
		Swap(&ht, &NewHt);//交换新哈希表和旧哈希表中的内容
		HashTableDestroy(&NewHt);//销毁新哈希表的内容
	}
	return 1;
}
void Swap(HT *lht, HT *rht)
{
	int tmp;
	assert(lht);
	assert(rht);
	//交换capacity
	tmp = lht->_capacity;
	lht->_capacity = rht->_capacity;
	rht->_capacity = tmp;
	//交换size
	tmp = lht->_size;
	lht->_size = rht->_size;
	rht->_size = tmp;
	//交换IsDetective
	tmp = lht->_IsLineDetective;
	lht->_IsLineDetective = rht->_IsLineDetective;
	rht->_IsLineDetective = tmp;
	//交换array
	tmp = (int)lht->_array;
	lht->_array = rht->_array;
	rht->_array = (HTElem*)tmp;
}
void HashTableDestroy(HT *ht)
{
	assert(ht);
	if (ht->_array)//如果有空间
	{
		free(ht->_array);//释放空间
		ht->_array = NULL;
		ht->_capacity = 0;//容量清零
		ht->_size = 0;
	}
}
void HashTableDelete(HT *ht, DataType data)
{
	int ret = -1;
	assert(ht);
	ret = HashTableFind(ht, data);
	if (-1 != ret)
	{
		ht->_array[ret]._state = DELETE;
		ht->_size--;
	}
}
//查找
int HashTableFind(HT *ht, DataType data)
{
	int hashAddr = -1;
	int startAddr = -1;//开始查找的地址
	int i = 0;
	assert(ht);
	hashAddr = HashFunc(data);
	startAddr = hashAddr;
	while (ht->_array[hashAddr]._state != EMPTY)
	{
		if (EXIST == ht->_array[hashAddr]._state)
		{
			if (data == ht->_array[hashAddr]._data)
				return hashAddr;
		}
		//hashAddr++;//没有找到,继续往后找
		//if (hashAddr == MAX_SIZE)//越界了,从头再找
		//	hashAddr = 0;
		//if (hashAddr == startAddr)//哈希地址等于开始查找的起始地址,说明找了一圈都没有找到
		//	return -1;
		if (ht->_IsLineDetective)
		{
			DetectiveLine(hashAddr);
			//找了一圈没有找到
			if (hashAddr == startAddr)
				return -1;
		}
		else
			Detective2(hashAddr, ++i);
	}
	return -1;//没有找到
}
//计算哈希表格中的元素个数
int HashTableSize(HT *ht)
{
	return ht->_size;
}
//判断哈希表格是不是空的
int HashTableEmpty(HT *ht)
{
	assert(ht);
	return 0 == ht->_size;
}
int DetectiveLine(int hashAddr)//线性探测
{
	hashAddr++;
	if (hashAddr == MAX_SIZE)
		hashAddr = 0;
	return hashAddr;
}
void Detective2(int hashAddr, int i)//二次探测
{
	hashAddr = hashAddr + 2 * i + 1;
	if (hashAddr >= MAX_SIZE)//越界了
		hashAddr %= MAX_SIZE;//越界之后模回来
	return hashAddr;
}
int HashFunc(DataType data)
{
	return StrToInt(data) % MAX_SIZE;
}

common.h

# pragma once
typedef unsigned int size_t;//# include<stddef.h>
size_t GetNextPrime(int capacity);
 
unsigned int StrToInt(const char * str);

common.c

//闭散列用的少,因为浪费空间
# define _CRT_SECURE_NO_WARNINGS 1
# include"Common.h"
#define _PrimeSize 28//enum{_PrimeSize =28};

// 使用素数表对齐做哈希表的容量,降低哈希冲突
const unsigned long _PrimeList[_PrimeSize]=
{//可将long换为long long获取更大的素数
	53ul, 97ul, 193ul, 389ul, 769ul,
	1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
	49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
	1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
	50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457u,
	1610612711ul, 3221225473ul, 4294967291ul
};
//获取比容量大的第一个素数
size_t GetNextPrime(size_t capacity)
{
	int i = 0;
	for (; i < _PrimeSize; ++i)
	{
		if (_PrimeList[i]>capacity)//容量小于素数,直接返回该素数
			return _PrimeList[i];
	}
	return _PrimeList[_PrimeSize - 1];//容量太大了,返回最后一个素数
}
unsigned int StrToInt(const char * str)
{
	unsigned int seed = 131; // 31 131 1313 13131 131313
	unsigned int hash = 0;
	while (*str)
	{
		hash = hash * seed + (*str++);
	}
	return (hash & 0x7FFFFFFF);
}



test.c

# include"sushu.h"
  
void TestHashTable()
{
	HashTable ht;
	HashTableInit(&ht, 10, 1);
	HashTableInsert(&ht, "欧阳锋");
	HashTableInsert(&ht, "李某某");
	HashTableInsert(&ht, "哈哈");
	HashTableInsert(&ht, "会话");
}
int main()
{
	TestHashTable();
	system("pause");
	return 0;
}


猜你喜欢

转载自blog.csdn.net/xuruhua/article/details/80316086