DSAA之Closed Hashing Rehash(三)

1. Rehashing

  • If the table gets too full, the running time for the operations will start taking too long and inserts might fail for closed hashing with quadratic resolution.
  • This can happen if there are too many deletions intermixed with insertions. A solution, then, is to build another table that is about twice as big (with associated new hash function) and scan down the entire original hash table, computing the new hash value for each (non-deleted) element and inserting it in the new table.
  • This entire operation is called rehashing. This is obviously a very expensive operation – the running time is O ( n ) , since there are n elements to rehash and the table size is roughly 2n.
  • Rehashing can be implemented in several ways with quadratic probing.
    • One alternative is to rehash as soon as the table is half full.
    • The other extreme is to rehash only when an insertion fails.
    • A third, middle of the road, strategy is to rehash when the table reaches a certain load factor. Since performance does degrade as the load factor increases, the third strategy, implemented with a good cutoff, could be best.

  不仅仅是哈希表可以进行rehash,其他的数据结构也可以。笔者将根据负载因子,针对线性探索编写一个rehash的实现。

2. 代码实现

  根据DSAA之Open Hash(一)修改如下:

#include <stdio.h>
#include <stdlib.h>
#include <err.h>
#define handle_error(msg) do{ perror(msg); exit(-1);}while(0)

typedef enum state{
  empty,
  deleted,
  available
} STAT;
typedef struct cell {
  int key;
  STAT x;
} CELL;
struct hash_tbl{
    unsigned int table_size;
    CELL * the_cells;
};

typedef struct hash_tbl * HASH_TABLE;


HASH_TABLE rehash( HASH_TABLE H);
int hash(int key, HASH_TABLE H);
void insert( int key, HASH_TABLE * H_ptr );
CELL * find( int key, HASH_TABLE H);
HASH_TABLE initialize_table( unsigned int table_size );
void delete(int key, HASH_TABLE H);
float load;

int main (){
    int i,n,num;
    HASH_TABLE hash_table;
    printf("input the table_size :\n");
    scanf("%d",&n);
    hash_table=initialize_table(n);
    printf("insert 1..n/2-1 to your hash_table\n");
    for ( i=1;i<=n/2-1;i++)
        insert(i,&hash_table);
    printf("done\n");

    //打印建立的hash table
    printf("hash_table:\n");
    for(i=0;i<hash_table->table_size;i++){
        printf("[%d] ",i);
    if((hash_table->the_cells)[i].x == available)
            printf("%d \n",(hash_table->the_cells)[i].key);
    else
            printf(" \n");
    }
    printf("\n");

    //随意输入查询hash table
    printf("please input the key you want to find:\n");
    scanf("%d",&num);
    if(find(num,hash_table)->x != available )
        printf("can't find your key %d\n",num);
    else
        printf("find the key %d\n",num);

    //随意输入删除hash table
    printf("please input the key you want to delete:\n");
    scanf("%d",&num);
    delete(num, hash_table);

    //打印建立的hash table
    printf("hash_table:\n");
    for(i=0;i<hash_table->table_size;i++){
        printf("[%d] ",i);
    if((hash_table->the_cells)[i].x == available)
            printf("%d \n",(hash_table->the_cells)[i].key);
    else
            printf(" \n");
    }
    printf("\n");

    //测试rehash
    printf("insert two more to triger rehashing\n");
    insert(hash_table->table_size,&hash_table);
    insert(hash_table->table_size-1,&hash_table);
    printf("after rehash, the table :\n");
    for(i=0;i<hash_table->table_size;i++){
        printf("[%d] ",i);
    if((hash_table->the_cells)[i].x == available)
            printf("%d \n",(hash_table->the_cells)[i].key);
    else
            printf(" \n");
    }
    printf("\n");
}

int hash(int key, HASH_TABLE H){
    return key%H->table_size;
}

HASH_TABLE rehash( HASH_TABLE H ){
    unsigned int i, old_size;
    CELL* old_cells;
    old_cells = H->the_cells;
    old_size = H->table_size;
    //容易漏掉
    load=0;
    /* Get a new, empty table */
    H = initialize_table( 2*old_size );
    /* Scan through old table, reinserting into new */
    for( i=0; i<old_size; i++ )
        if( old_cells[i].x == available )
    //特别注意,这里&H的地址实际上是形参H的地址,但是这并不会产生任何问题,因为在rehash里面调用
            insert( old_cells[i].key, &H );
    free( old_cells );
    return H;
}

HASH_TABLE initialize_table( unsigned int table_size ){
    HASH_TABLE H;
    int i;
    H = malloc ( sizeof (struct hash_tbl) );
    if( H == NULL )
        errx(1,"Out of space!!!\n");
    H->table_size=table_size;
    H->the_cells = calloc( H->table_size, sizeof (CELL));
    if( H->the_cells == NULL )
        errx(1,"Out of space\n");
    return H;
}
CELL *  find( int key, HASH_TABLE H){
    CELL * ptr=H->the_cells;
    int i,j;
    for(i=hash(key,H);ptr[i].x == available;){
        if(ptr[i].key  == key)
            break;
        if(++i >= H->table_size)
            i-=H->table_size;
    }
    return &ptr[i];
}

void insert( int key, HASH_TABLE * H_ptr ){
    CELL * pos;
    HASH_TABLE H=*H_ptr;
    pos = find( key, H );
    if( pos->x != available ){
        //负载因子判断
        //debug
        //printf("load %f ,load ratio %f\n",load,load/H->table_size );
        if(load/H->table_size >= 0.4){
            *H_ptr=rehash(H);
            //递归调用,基准条件就是负载因子小于0.4
            insert(key,H_ptr);
        }
        else{
            pos->key=key;
            pos->x=available;
            load++;
        }
    }
}

void delete(int key, HASH_TABLE H){
    CELL * ptr=find(key,H);
    if(ptr->x != available)
        errx(1,"can't find the key\n");
    else
        ptr->x=deleted;
    load--;
}

3. 结果

[root@localhost ~]# ./5_2
input the table_size :
10
insert 1..n/2-1 to your hash_table
done
hash_table:
[0]  
[1] 1 
[2] 2 
[3] 3 
[4] 4 
[5]  
[6]  
[7]  
[8]  
[9]  

please input the key you want to find:
1
find the key 1
please input the key you want to delete:
1
hash_table:
[0]  
[1]  
[2] 2 
[3] 3 
[4] 4 
[5]  
[6]  
[7]  
[8]  
[9]  

insert two more to triger rehashing
after rehash, the table :
[0]  
[1]  
[2] 2 
[3] 3 
[4] 4 
[5]  
[6]  
[7]  
[8]  
[9] 9 
[10] 10 
[11]  
[12]  
[13]  
[14]  
[15]  
[16]  
[17]  
[18]  
[19]  

[root@localhost ~]# 

  结果显示很好的实现了功能,特别的强调rehash的时间复杂度将会是 O ( n ) ,取决于输入数据量的大小。

猜你喜欢

转载自blog.csdn.net/lovestackover/article/details/80149653