哈希与拉链法

1、哈希的引入和简单使用

我们看下面一个关于查找的例子：

#include <stdio.h>

int find_key(int a[], int n, int key)
{
    int i;
    for (i = 0; i < n; i++) {
        if (key == a[i]) {
            return 1;
        }
    }
    return 0;
}

int main()
{
    int a[] = {7, 17, 5, 8, 2, 9, 20, 3, 2, 5};
    // 查找1-10是否在a当中
    for (int i = 1; i <=10; i++) {
        if (find_key(a, 10, i)) {
            printf("%d is in array");
        } else {
            printf("%d is not in array");
        }
    }
    return 0;
}

查询一个的时间复杂度为O(n)；如果说查询次数为n，那么整体的复杂度就是O(n^2)；

而通过哈希表就可以将整体的效率优化到线性的O(n)；

看一个哈希表的特例情况，假如数据的范围是0-99并且类型是整形并且类型是整数，这种情况可以直接使用数组下标来记录元素是否出现。这个就是最简单的哈希思想。

哈希查找：

#include <stdio.h>
#define MAX_TABLE_LEN 100  // 哈希表的最大长度

// 哈希表的创建函数
void create_hash(int a[], int n, int table[])
{
    for (int i = 0; i <n; i++) {
        table[a[i]]++;
    }
}

int find_key(int table[], int key) {
     return table[key]!=0;
}

int main()
{
    int a[]={7, 17, 5, 8, 2, 9, 20, 3, 2, 5};
    // 初始化一个长度为100的数组table，初始化所有的元素为0
    int table[MAX_TABLE_LEN] = {0};
    create_hash(a, 10, table);
    printf("in hash table:\n");
    
    for (int i=0; i <MAX_TABLE_LEN; i++) {
        if (table[i] > 0) {
            printf("%d appear %d times.\n", i, tale[i]);
        }
    }
    
    printf("test\n");
    for (int i = 0; i <= 10; i++) {
        if (find_key(table, i)) {
            printf("%d is in array.\n", i);
        } else {
            printf("%d is not in array.\n", i);
        }
    }
    return 0;
}

我们通过上述的方法不仅可以用来查找还可以用来排序，如果说数组a当中有n个元素。n很大，但是a中元素的范围非常小，比如0-99，那么通过0-99长度的table，记录a中每个元素出现的次数，由于table[i]代表了数据 i 出现的次数，在排序的时候，从0-MAX_TABLE_LEN循环i，再将table[i]个i添加到a中就可以了。

计数排序（运用hash的思想）

#include <stdio.h>
#define MAX_TABLE_LEN 100  // 哈希表的最大长度

void sort(int a[], int n)
{
    int table[MAX_TABLE_LEN] = {0};
    for (int i = 0; i <n; i++) {
        table[a[i]]++;    // 记录a中每个元素出现的次数
    }
    int k = 0;
    for (int i = 0; i <MAX_TABLE_LEN; i++) {
        for (int j = 0; j < table[i]; j++) {
            a[k++] = i;    // table[i]代表了数据i出现的次数， 将table[i]个i添加到a中
        }
    }    
}

int main()
{
    int a[] = {7, 17, 5, 8, 2, 9, 20, 3, 2, 5};
    sort (a, 10);
    for (int i = 0; i < 10; i++) {
        printf("%d", a[i]);
    }
    return 0;
}

当n远大于表长的时候，计数排序算法的时间复杂度就是O(n)，优于一般排序算法的平均时间复杂度为O(nlogn)。

2、哈希表与哈希冲突

哈希表也称为散列表，hashtable，它是可以根据关键字的值直接进行查询和访问的数据结构，我们通常映射函数将关键字直接对应到表中的某个位置，从而加快查找速度，这个映射函数就是哈希函数，存放记录的数组就叫做哈希表，例如下面的例子：

哈希函数：H(key) = key % 12

如果上图，哈希函数是对12取余，在哈希表中a[0]中可能存有48、12等多个数值，即哈希表可能会把两个或两个以上的不同关键字映射到同一个地址，这时就会产生哈希冲突。

解决哈希冲突的几种方法：

1）线性探测法，冲突发生，顺序查看表中的下一个元素，直到找出一个空闲单元

2）平方探测法：设发生冲突的地址为d，平方探测法得到的新的地址序列为：d+1^2，d-1^2，d+2^2，d-2^2，，，

3）拉链法

对于不同的关键字可能会通过哈希函数映射到同一地址，为了避免非同义词发生冲突，可以将所有的同义词存储到一个线性链表中，这个线性链表由散列地址唯一标识。拉链法适用于经常进行插入和删除的情况。上图所表示的就是拉链法。

一些概念：

装填因子：哈希表用装填因子表示一个表的装满程度。等于表中记录数m/哈希表程度m。

3、拉链法

下面使用一个demo来设计一个拉链法的使用：

Hashlink.h

#include <iostream>
#include <cmath>
#include <stdlib.h>
#include <math.h>

using namespace std;

#define NUM 15

int key_num = 0;

typedef struct Node {
        int key;
        Node *next;
} Node, *linklist;

typedef struct hash{
        Node linknum[NUM];
        int key_num;
} hash;

int hash_create(hash *H);

int hash_insert(hash *H, int key);

int hash_calculate(int key);

int link_insert(Node *node_first, int key);

int hash_delete(hash *H, int key);                                                                   

int hash_search(hash *H, int key);

int hash_print(hash *H);

int hash_destroy(hash *H);

Hashlink.cpp

#include "Hashlink.h"

int hash_calculate(int key)
{
        return key % NUM;
}

int link_insert(Node *node_first, int key)
{
        Node *temp = node_first;
        Node *node = new Node;
        node->key = key;
        node->next = NULL;
        if (temp->next == NULL) {
                temp->next = node;
                return 1;
        } else {
                while (1) {
                        if (temp->next->key < key && temp->next->next == NULL) {
                                temp->next->next = node;
                                break;
                        }

                        if (temp->next->key > key) {
                                node->next = temp->next;
                                temp->next = node;
                                break;
                        }

                        if (temp ->next->key < key && temp->next->next->key > key) {
                                node->next = temp->next->next;
                                temp->next->next = node;
                                break;
                        }
                        temp = temp->next;
                }
        }

        return 0;
}

int hash_create(hash *H)
{
        for (int i = 0; i < NUM; i++) {
                H->linknum[i].key = 0;
                H->linknum[i].next = NULL;
        }
        H->key_num = 0;
        cout << "hash_create success." << endl;

        return 0;
}

int hash_insert(hash *H, int key)
{
        int value = hash_calculate(key);
        link_insert(&H->linknum[value], key);
        H->key_num++;

        cout << "hash_insert " << key << " success." << endl;
        return 0;
}

int hash_print(hash *H)
{
        cout << endl << "----hash H have " << H->key_num << " elem----" << endl;
        for (int i = 0; i < NUM; i++) {
                cout << i << " : ";
                Node *temp = &H->linknum[i];
                while (temp -> next != NULL)
                {
                        cout << temp->next->key << " ";
                        temp = temp->next;
                }
                cout << endl;
        }
        cout << "--------------------------" << endl;

        return 0;
}

int hash_search(hash *H, int key)
{
        int mod = key % NUM;
        int space = 0;
        Node *temp = &H->linknum[mod];
        if (temp->next == NULL) {
                cout << "hash_search---hashtable no [ " << key << " ]" << endl;
                return -1;
        }
        while (temp->next != NULL) {
                space ++;
                if (temp->next->key == key) {
                        cout << "hash_search---[ " << key << " ] in (" << mod << ", " << space << ")" << endl;
                        return 0;
                }
                temp = temp->next;
        }
        cout << "hash_search---hashtable no [ " << key << " ]" << endl;

        return 0;
}

int hash_destroy(hash *H)
{
        for(int i = 0; i < NUM; i++) {
                Node *temp = &H->linknum[i];
                Node *delete_node = NULL;
                while (temp->next != NULL) {
                        delete_node = temp->next;
                        temp = temp->next;
                        delete delete_node;
                }
        }
        cout << "hash_destroy success." << endl;

        return 0;        
}

int hash_delete(hash *H, int key)
{
        int mod = key % NUM;
        Node *delete_node = &H->linknum[mod];
        if (delete_node->next == NULL) {
                cout << "hashtable have no " << key << "." << endl;
                return -1;
        }

        while (delete_node->next->key != key) 
        {        
                delete_node = delete_node->next;
                if (delete_node->next == NULL) {
                        cout << "hashtable have no " << key << "." << endl;
                        return -1;
                }
        }
        Node *del = delete_node->next;
        delete_node->next = delete_node->next->next;
        delete del;
        H->key_num--;

        cout << "hash_delete " << key << " success." << endl;
        return 0;
}

int main()
{
        hash H;
        hash_create(&H);

        hash_insert(&H, 1);
        hash_insert(&H, 2);
        hash_insert(&H, 3);
        hash_insert(&H, 4);
        hash_insert(&H, 0);
        hash_insert(&H, 100);
        hash_insert(&H, 5);
        hash_insert(&H, 200);
        hash_insert(&H, 300);
        hash_insert(&H, 800);
        hash_insert(&H, 30);
        hash_insert(&H, 40);
        hash_insert(&H, 50);
        hash_insert(&H, 1000);
        hash_insert(&H, 80000);
        hash_insert(&H, 6);
        hash_insert(&H, 56);
        hash_insert(&H, 66);
        hash_insert(&H, 76);
        hash_insert(&H, 78);
        hash_insert(&H, 86);

        hash_insert(&H, 48);
        hash_insert(&H, 39);
        hash_insert(&H, 19);
        hash_search(&H, 100);
        hash_search(&H, 990);
        hash_print(&H);
        hash_delete(&H, 100);
        hash_delete(&H, 886);
        hash_delete(&H, 0);
        hash_delete(&H, 1);
        hash_delete(&H, 67);
        hash_delete(&H, 80000);
        hash_print(&H);
        hash_destroy(&H);
}

执行结果如下：

hash_create success.
hash_insert 1 success.
hash_insert 2 success.
hash_insert 3 success.
hash_insert 4 success.
hash_insert 0 success.
hash_insert 100 success.
hash_insert 5 success.
hash_insert 200 success.
hash_insert 300 success.
hash_insert 800 success.
hash_insert 30 success.
hash_insert 40 success.
hash_insert 50 success.
hash_insert 1000 success.
hash_insert 80000 success.
hash_insert 6 success.
hash_insert 56 success.
hash_insert 66 success.
hash_insert 76 success.
hash_insert 78 success.
hash_insert 86 success.
hash_insert 48 success.
hash_insert 39 success.
hash_insert 19 success.
hash_search---[ 100 ] in (10, 2)
hash_search---hashtable no [ 990 ]

----hash H have 24 elem----
0 : 0 30 300 
1 : 1 76 
2 : 2 
3 : 3 48 78 
4 : 4 19 
5 : 5 50 200 800 80000 
6 : 6 66 
7 : 
8 : 
9 : 39 
10 : 40 100 1000 
11 : 56 86 
12 : 
13 : 
14 : 
--------------------------
hash_delete 100 success.
hashtable have no 886.
hash_delete 0 success.
hash_delete 1 success.
hashtable have no 67.
hash_delete 80000 success.

----hash H have 20 elem----
0 : 30 300 
1 : 76 
2 : 2 
3 : 3 48 78 
4 : 4 19 
5 : 5 50 200 800 
6 : 6 66 
7 : 
8 : 
9 : 39 
10 : 40 1000 
11 : 56 86 
12 : 
13 : 
14 : 
--------------------------
hash_destroy success.

1、哈希的引入和简单使用

2、哈希表与哈希冲突

3、拉链法

猜你喜欢