1. Rehashing
- If the table gets too full, the running time for the operations will start taking too long and inserts might fail for closed hashing with quadratic resolution.
- This can happen if there are too many deletions intermixed with insertions. A solution, then, is to build another table that is about twice as big (with associated new hash function) and scan down the entire original hash table, computing the new hash value for each (non-deleted) element and inserting it in the new table.
- This entire operation is called rehashing. This is obviously a very expensive operation – the running time is , since there are n elements to rehash and the table size is roughly 2n.
- Rehashing can be implemented in several ways with quadratic probing.
- One alternative is to rehash as soon as the table is half full.
- The other extreme is to rehash only when an insertion fails.
- A third, middle of the road, strategy is to rehash when the table reaches a certain load factor. Since performance does degrade as the load factor increases, the third strategy, implemented with a good cutoff, could be best.
不仅仅是哈希表可以进行rehash,其他的数据结构也可以。笔者将根据负载因子,针对线性探索编写一个rehash
的实现。
2. 代码实现
根据DSAA之Open Hash(一)修改如下:
#include <stdio.h>
#include <stdlib.h>
#include <err.h>
#define handle_error(msg) do{ perror(msg); exit(-1);}while(0)
typedef enum state{
empty,
deleted,
available
} STAT;
typedef struct cell {
int key;
STAT x;
} CELL;
struct hash_tbl{
unsigned int table_size;
CELL * the_cells;
};
typedef struct hash_tbl * HASH_TABLE;
HASH_TABLE rehash( HASH_TABLE H);
int hash(int key, HASH_TABLE H);
void insert( int key, HASH_TABLE * H_ptr );
CELL * find( int key, HASH_TABLE H);
HASH_TABLE initialize_table( unsigned int table_size );
void delete(int key, HASH_TABLE H);
float load;
int main (){
int i,n,num;
HASH_TABLE hash_table;
printf("input the table_size :\n");
scanf("%d",&n);
hash_table=initialize_table(n);
printf("insert 1..n/2-1 to your hash_table\n");
for ( i=1;i<=n/2-1;i++)
insert(i,&hash_table);
printf("done\n");
//打印建立的hash table
printf("hash_table:\n");
for(i=0;i<hash_table->table_size;i++){
printf("[%d] ",i);
if((hash_table->the_cells)[i].x == available)
printf("%d \n",(hash_table->the_cells)[i].key);
else
printf(" \n");
}
printf("\n");
//随意输入查询hash table
printf("please input the key you want to find:\n");
scanf("%d",&num);
if(find(num,hash_table)->x != available )
printf("can't find your key %d\n",num);
else
printf("find the key %d\n",num);
//随意输入删除hash table
printf("please input the key you want to delete:\n");
scanf("%d",&num);
delete(num, hash_table);
//打印建立的hash table
printf("hash_table:\n");
for(i=0;i<hash_table->table_size;i++){
printf("[%d] ",i);
if((hash_table->the_cells)[i].x == available)
printf("%d \n",(hash_table->the_cells)[i].key);
else
printf(" \n");
}
printf("\n");
//测试rehash
printf("insert two more to triger rehashing\n");
insert(hash_table->table_size,&hash_table);
insert(hash_table->table_size-1,&hash_table);
printf("after rehash, the table :\n");
for(i=0;i<hash_table->table_size;i++){
printf("[%d] ",i);
if((hash_table->the_cells)[i].x == available)
printf("%d \n",(hash_table->the_cells)[i].key);
else
printf(" \n");
}
printf("\n");
}
int hash(int key, HASH_TABLE H){
return key%H->table_size;
}
HASH_TABLE rehash( HASH_TABLE H ){
unsigned int i, old_size;
CELL* old_cells;
old_cells = H->the_cells;
old_size = H->table_size;
//容易漏掉
load=0;
/* Get a new, empty table */
H = initialize_table( 2*old_size );
/* Scan through old table, reinserting into new */
for( i=0; i<old_size; i++ )
if( old_cells[i].x == available )
//特别注意,这里&H的地址实际上是形参H的地址,但是这并不会产生任何问题,因为在rehash里面调用
insert( old_cells[i].key, &H );
free( old_cells );
return H;
}
HASH_TABLE initialize_table( unsigned int table_size ){
HASH_TABLE H;
int i;
H = malloc ( sizeof (struct hash_tbl) );
if( H == NULL )
errx(1,"Out of space!!!\n");
H->table_size=table_size;
H->the_cells = calloc( H->table_size, sizeof (CELL));
if( H->the_cells == NULL )
errx(1,"Out of space\n");
return H;
}
CELL * find( int key, HASH_TABLE H){
CELL * ptr=H->the_cells;
int i,j;
for(i=hash(key,H);ptr[i].x == available;){
if(ptr[i].key == key)
break;
if(++i >= H->table_size)
i-=H->table_size;
}
return &ptr[i];
}
void insert( int key, HASH_TABLE * H_ptr ){
CELL * pos;
HASH_TABLE H=*H_ptr;
pos = find( key, H );
if( pos->x != available ){
//负载因子判断
//debug
//printf("load %f ,load ratio %f\n",load,load/H->table_size );
if(load/H->table_size >= 0.4){
*H_ptr=rehash(H);
//递归调用,基准条件就是负载因子小于0.4
insert(key,H_ptr);
}
else{
pos->key=key;
pos->x=available;
load++;
}
}
}
void delete(int key, HASH_TABLE H){
CELL * ptr=find(key,H);
if(ptr->x != available)
errx(1,"can't find the key\n");
else
ptr->x=deleted;
load--;
}
3. 结果
[root@localhost ~]# ./5_2
input the table_size :
10
insert 1..n/2-1 to your hash_table
done
hash_table:
[0]
[1] 1
[2] 2
[3] 3
[4] 4
[5]
[6]
[7]
[8]
[9]
please input the key you want to find:
1
find the key 1
please input the key you want to delete:
1
hash_table:
[0]
[1]
[2] 2
[3] 3
[4] 4
[5]
[6]
[7]
[8]
[9]
insert two more to triger rehashing
after rehash, the table :
[0]
[1]
[2] 2
[3] 3
[4] 4
[5]
[6]
[7]
[8]
[9] 9
[10] 10
[11]
[12]
[13]
[14]
[15]
[16]
[17]
[18]
[19]
[root@localhost ~]#
结果显示很好的实现了功能,特别的强调rehash的时间复杂度将会是 ,取决于输入数据量的大小。