哈希与拉链法

1、哈希的引入和简单使用

我们看下面一个关于查找的例子:

#include <stdio.h>

int find_key(int a[], int n, int key)
{
    int i;
    for (i = 0; i < n; i++) {
        if (key == a[i]) {
            return 1;
        }
    }
    return 0;
}

int main()
{
    int a[] = {7, 17, 5, 8, 2, 9, 20, 3, 2, 5};
    // 查找1-10是否在a当中
    for (int i = 1; i <=10; i++) {
        if (find_key(a, 10, i)) {
            printf("%d is in array");
        } else {
            printf("%d is not in array");
        }
    }
    return 0;
}

查询一个的时间复杂度为O(n);如果说查询次数为n,那么整体的复杂度就是O(n^2);

而通过哈希表就可以将整体的效率优化到线性的O(n);

看一个哈希表的特例情况,假如数据的范围是0-99并且类型是整形并且类型是整数,这种情况可以直接使用数组下标来记录元素是否出现。这个就是最简单的哈希思想。

哈希查找:

#include <stdio.h>
#define MAX_TABLE_LEN 100  // 哈希表的最大长度

// 哈希表的创建函数
void create_hash(int a[], int n, int table[])
{
    for (int i = 0; i <n; i++) {
        table[a[i]]++;
    }
}

int find_key(int table[], int key) {
     return table[key]!=0;
}

int main()
{
    int a[]={7, 17, 5, 8, 2, 9, 20, 3, 2, 5};
    // 初始化一个长度为100的数组table,初始化所有的元素为0
    int table[MAX_TABLE_LEN] = {0};
    create_hash(a, 10, table);
    printf("in hash table:\n");
    
    for (int i=0; i <MAX_TABLE_LEN; i++) {
        if (table[i] > 0) {
            printf("%d appear %d times.\n", i, tale[i]);
        }
    }
    
    printf("test\n");
    for (int i = 0; i <= 10; i++) {
        if (find_key(table, i)) {
            printf("%d is in array.\n", i);
        } else {
            printf("%d is not in array.\n", i);
        }
    }
    return 0;
}

我们通过上述的方法不仅可以用来查找还可以用来排序,如果说数组a当中有n个元素。n很大,但是a中元素的范围非常小,比如0-99,那么通过0-99长度的table,记录a中每个元素出现的次数,由于table[i]代表了数据 i 出现的次数,在排序的时候,从0-MAX_TABLE_LEN循环i,再将table[i]个i添加到a中就可以了。

计数排序(运用hash的思想)

#include <stdio.h>
#define MAX_TABLE_LEN 100  // 哈希表的最大长度

void sort(int a[], int n)
{
    int table[MAX_TABLE_LEN] = {0};
    for (int i = 0; i <n; i++) {
        table[a[i]]++;    // 记录a中每个元素出现的次数
    }
    int k = 0;
    for (int i = 0; i <MAX_TABLE_LEN; i++) {
        for (int j = 0; j < table[i]; j++) {
            a[k++] = i;    // table[i]代表了数据i出现的次数, 将table[i]个i添加到a中
        }
    }    
}

int main()
{
    int a[] = {7, 17, 5, 8, 2, 9, 20, 3, 2, 5};
    sort (a, 10);
    for (int i = 0; i < 10; i++) {
        printf("%d", a[i]);
    }
    return 0;
}

当n远大于表长的时候,计数排序算法的时间复杂度就是O(n),优于一般排序算法的平均时间复杂度为O(nlogn)。

2、哈希表与哈希冲突

哈希表也称为散列表,hashtable,它是可以根据关键字的值直接进行查询和访问的数据结构,我们通常映射函数将关键字直接对应到表中的某个位置,从而加快查找速度,这个映射函数就是哈希函数,存放记录的数组就叫做哈希表,例如下面的例子:

哈希函数:H(key) = key % 12

如果上图,哈希函数是对12取余,在哈希表中a[0]中可能存有48、12等多个数值,即哈希表可能会把两个或两个以上的不同关键字映射到同一个地址,这时就会产生哈希冲突。

解决哈希冲突的几种方法:

1)线性探测法,冲突发生,顺序查看表中的下一个元素,直到找出一个空闲单元

2)平方探测法:设发生冲突的地址为d,平方探测法得到的新的地址序列为:d+1^2,d-1^2,d+2^2,d-2^2,,,

3)拉链法

对于不同的关键字可能会通过哈希函数映射到同一地址,为了避免非同义词发生冲突,可以将所有的同义词存储到一个线性链表中,这个线性链表由散列地址唯一标识。拉链法适用于经常进行插入和删除的情况。上图所表示的就是拉链法。

一些概念:

装填因子:哈希表用装填因子表示一个表的装满程度。等于 表中记录数m/哈希表程度m。

3、拉链法

下面使用一个demo来设计一个拉链法的使用:

Hashlink.h

#include <iostream>
#include <cmath>
#include <stdlib.h>
#include <math.h>

using namespace std;

#define NUM 15

int key_num = 0;

typedef struct Node {
        int key;
        Node *next;
} Node, *linklist;

typedef struct hash{
        Node linknum[NUM];
        int key_num;
} hash;

int hash_create(hash *H);

int hash_insert(hash *H, int key);

int hash_calculate(int key);

int link_insert(Node *node_first, int key);

int hash_delete(hash *H, int key);                                                                   

int hash_search(hash *H, int key);

int hash_print(hash *H);

int hash_destroy(hash *H);

Hashlink.cpp

#include "Hashlink.h"

int hash_calculate(int key)
{
        return key % NUM;
}

int link_insert(Node *node_first, int key)
{
        Node *temp = node_first;
        Node *node = new Node;
        node->key = key;
        node->next = NULL;
        if (temp->next == NULL) {
                temp->next = node;
                return 1;
        } else {
                while (1) {
                        if (temp->next->key < key && temp->next->next == NULL) {
                                temp->next->next = node;
                                break;
                        }

                        if (temp->next->key > key) {
                                node->next = temp->next;
                                temp->next = node;
                                break;
                        }

                        if (temp ->next->key < key && temp->next->next->key > key) {
                                node->next = temp->next->next;
                                temp->next->next = node;
                                break;
                        }
                        temp = temp->next;
                }
        }

        return 0;
}

int hash_create(hash *H)
{
        for (int i = 0; i < NUM; i++) {
                H->linknum[i].key = 0;
                H->linknum[i].next = NULL;
        }
        H->key_num = 0;
        cout << "hash_create success." << endl;

        return 0;
}

int hash_insert(hash *H, int key)
{
        int value = hash_calculate(key);
        link_insert(&H->linknum[value], key);
        H->key_num++;

        cout << "hash_insert " << key << " success." << endl;
        return 0;
}

int hash_print(hash *H)
{
        cout << endl << "----hash H have " << H->key_num << " elem----" << endl;
        for (int i = 0; i < NUM; i++) {
                cout << i << " : ";
                Node *temp = &H->linknum[i];
                while (temp -> next != NULL)
                {
                        cout << temp->next->key << " ";
                        temp = temp->next;
                }
                cout << endl;
        }
        cout << "--------------------------" << endl;

        return 0;
}

int hash_search(hash *H, int key)
{
        int mod = key % NUM;
        int space = 0;
        Node *temp = &H->linknum[mod];
        if (temp->next == NULL) {
                cout << "hash_search---hashtable no [ " << key << " ]" << endl;
                return -1;
        }
        while (temp->next != NULL) {
                space ++;
                if (temp->next->key == key) {
                        cout << "hash_search---[ " << key << " ] in (" << mod << ", " << space << ")" << endl;
                        return 0;
                }
                temp = temp->next;
        }
        cout << "hash_search---hashtable no [ " << key << " ]" << endl;

        return 0;
}

int hash_destroy(hash *H)
{
        for(int i = 0; i < NUM; i++) {
                Node *temp = &H->linknum[i];
                Node *delete_node = NULL;
                while (temp->next != NULL) {
                        delete_node = temp->next;
                        temp = temp->next;
                        delete delete_node;
                }
        }
        cout << "hash_destroy success." << endl;

        return 0;        
}

int hash_delete(hash *H, int key)
{
        int mod = key % NUM;
        Node *delete_node = &H->linknum[mod];
        if (delete_node->next == NULL) {
                cout << "hashtable have no " << key << "." << endl;
                return -1;
        }

        while (delete_node->next->key != key) 
        {        
                delete_node = delete_node->next;
                if (delete_node->next == NULL) {
                        cout << "hashtable have no " << key << "." << endl;
                        return -1;
                }
        }
        Node *del = delete_node->next;
        delete_node->next = delete_node->next->next;
        delete del;
        H->key_num--;

        cout << "hash_delete " << key << " success." << endl;
        return 0;
}

int main()
{
        hash H;
        hash_create(&H);

        hash_insert(&H, 1);
        hash_insert(&H, 2);
        hash_insert(&H, 3);
        hash_insert(&H, 4);
        hash_insert(&H, 0);
        hash_insert(&H, 100);
        hash_insert(&H, 5);
        hash_insert(&H, 200);
        hash_insert(&H, 300);
        hash_insert(&H, 800);
        hash_insert(&H, 30);
        hash_insert(&H, 40);
        hash_insert(&H, 50);
        hash_insert(&H, 1000);
        hash_insert(&H, 80000);
        hash_insert(&H, 6);
        hash_insert(&H, 56);
        hash_insert(&H, 66);
        hash_insert(&H, 76);
        hash_insert(&H, 78);
        hash_insert(&H, 86);

        hash_insert(&H, 48);
        hash_insert(&H, 39);
        hash_insert(&H, 19);
        hash_search(&H, 100);
        hash_search(&H, 990);
        hash_print(&H);
        hash_delete(&H, 100);
        hash_delete(&H, 886);
        hash_delete(&H, 0);
        hash_delete(&H, 1);
        hash_delete(&H, 67);
        hash_delete(&H, 80000);
        hash_print(&H);
        hash_destroy(&H);
}

执行结果如下:

hash_create success.
hash_insert 1 success.
hash_insert 2 success.
hash_insert 3 success.
hash_insert 4 success.
hash_insert 0 success.
hash_insert 100 success.
hash_insert 5 success.
hash_insert 200 success.
hash_insert 300 success.
hash_insert 800 success.
hash_insert 30 success.
hash_insert 40 success.
hash_insert 50 success.
hash_insert 1000 success.
hash_insert 80000 success.
hash_insert 6 success.
hash_insert 56 success.
hash_insert 66 success.
hash_insert 76 success.
hash_insert 78 success.
hash_insert 86 success.
hash_insert 48 success.
hash_insert 39 success.
hash_insert 19 success.
hash_search---[ 100 ] in (10, 2)
hash_search---hashtable no [ 990 ]

----hash H have 24 elem----
0 : 0 30 300 
1 : 1 76 
2 : 2 
3 : 3 48 78 
4 : 4 19 
5 : 5 50 200 800 80000 
6 : 6 66 
7 : 
8 : 
9 : 39 
10 : 40 100 1000 
11 : 56 86 
12 : 
13 : 
14 : 
--------------------------
hash_delete 100 success.
hashtable have no 886.
hash_delete 0 success.
hash_delete 1 success.
hashtable have no 67.
hash_delete 80000 success.

----hash H have 20 elem----
0 : 30 300 
1 : 76 
2 : 2 
3 : 3 48 78 
4 : 4 19 
5 : 5 50 200 800 
6 : 6 66 
7 : 
8 : 
9 : 39 
10 : 40 1000 
11 : 56 86 
12 : 
13 : 
14 : 
--------------------------
hash_destroy success.

猜你喜欢

转载自blog.csdn.net/qq_58550520/article/details/128942031