dict.h


//定义错误相关的码
#define DICT_OK 0
#define DICT_ERR 1

//实际存放数据的地方
typedef struct dictEntry {
    void *key;
    void *val;
    struct dictEntry *next;
} dictEntry;

//哈希表的定义
typedef struct dict {
    //指向实际的哈希表记录(用数组+开链的形式进行保存)
    dictEntry **table;
    //type中包含一系列哈希表需要用到的函数
    dictType *type;
    //size表示哈希表的大小，为2的指数
    unsigned long size;
    //sizemask=size-1,方便哈希值根据size取模
    unsigned long sizemask;
    //used记录了哈希表中有多少记录
    unsigned long used;
    void *privdata;
} dict;

//对Hash表进行迭代遍历时使用的迭代器
typedef struct dictIterator {
    dict *ht;
    int index;
    dictEntry *entry, *nextEntry;
} dictIterator;

/* This is the initial size of every hash table */
//每个Hash表的初始大小
#define DICT_HT_INITIAL_SIZE     4

dict.c

#include "fmacros.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <assert.h>
#include <limits.h>

#include "dict.h"
#include "zmalloc.h"

/* ---------------------------- Utility funcitons --------------------------- */

/**
 * 打印出系统的崩溃信息
 * C语言中的可变参数的使用va_start va_end
 */
static void _dictPanic(const char *fmt, ...)
{
    va_list ap;
    va_start(ap, fmt);
    fprintf(stderr, "\nDICT LIBRARY PANIC: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n\n");
    va_end(ap);
}

/* ------------------------- Heap Management Wrappers------------------------ */
/** 
 *  堆分配函数
 */
static void *_dictAlloc(size_t size)
{
    void *p = zmalloc(size);
    if (p == NULL)
        _dictPanic("Out of memory");
    return p;
}

/**
 * 堆释放函数
 */
static void _dictFree(void *ptr) {
    zfree(ptr);
}

/* -------------------------- private prototypes ---------------------------- */

static int _dictExpandIfNeeded(dict *ht);
static unsigned long _dictNextPower(unsigned long size);
static int _dictKeyIndex(dict *ht, const void *key);
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);

/* -------------------------- hash functions -------------------------------- */

/* Thomas Wang's 32 bit Mix Function */
/**
 * 求Hash的键值，可以促使均匀分布
 */
unsigned int dictIntHashFunction(unsigned int key)
{
    key += ~(key << 15);
    key ^=  (key >> 10);
    key +=  (key << 3);
    key ^=  (key >> 6);
    key += ~(key << 11);
    key ^=  (key >> 16);
    return key;
}

/**
 * 直接将整数key作为Hash的键值
 */
/* Identity hash function for integer keys */
unsigned int dictIdentityHashFunction(unsigned int key)
{
    return key;
}

/* Generic hash function (a popular one from Bernstein).
 * I tested a few and this was the best. */
//Hash函数(通用目的Hash函数)
unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
    unsigned int hash = 5381;

    while (len--)
        hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
    return hash;
}

/* ----------------------------- API implementation ------------------------- */

/* Reset an hashtable already initialized with ht_init().
 * NOTE: This function should only called by ht_destroy(). */
/**
 * 重设Hash表
 * 各种值都设置为0
 */
static void _dictReset(dict *ht)
{
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}

/**
 * 创建一个新的Hash表
 * type为可用于Hash表上面的相应函数
 */
/* Create a new hash table */
dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    dict *ht = _dictAlloc(sizeof(*ht));

    _dictInit(ht,type,privDataPtr);
    return ht;
}

/* Initialize the hash table */
/**
 * 初始化Hash表
 */
int _dictInit(dict *ht, dictType *type,
        void *privDataPtr)
{
    //对Hash表进行初始化
    _dictReset(ht);
    //初始化能够作用于Hash中的相应函数集
    ht->type = type;
    //初始化hashtable的私有数据段
    ht->privdata = privDataPtr;
    //返回初始化成功
    return DICT_OK;
}

/* Resize the table to the minimal size that contains all the elements,
 * but with the invariant of a USER/BUCKETS ration near to <= 1 */

/**
 * DICT_HT_INITIAL_SIZE=4表示的是Hash表的初始大小
 * 重新调整Hash表的大小
 * 从这里可以看出Hash表的最小的大注为4
 */
int dictResize(dict *ht)
{
    int minimal = ht->used;

    if (minimal < DICT_HT_INITIAL_SIZE)
        minimal = DICT_HT_INITIAL_SIZE;
    return dictExpand(ht, minimal);
}

/* Expand or create the hashtable */
/**
 * 创建Hash表，Hash表的大小为size
 */
int dictExpand(dict *ht, unsigned long size)
{
    dict n; /* the new hashtable */
    //重设Hash表的大小，大小为2的指数
    unsigned long realsize = _dictNextPower(size);

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hashtable */
    //如果大小比当原Hash表中记录数目还要小的话，则出错
    if (ht->used > size)
        return DICT_ERR;
    //初始化
    _dictInit(&n, ht->type, ht->privdata);
    n.size = realsize;
    //保证为素数
    n.sizemask = realsize-1;
    n.table = _dictAlloc(realsize*sizeof(dictEntry*));
    /* Initialize all the pointers to NULL */
    //将所有的指针初始为空
    memset(n.table, 0, realsize*sizeof(dictEntry*));

    /* Copy all the elements from the old to the new table:
     * note that if the old hash table is empty ht->size is zero,
     * so dictExpand just creates an hash table.
     * */
    //使用的内存记录数
    n.used = ht->used;
    int i;
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;

        if (ht->table[i] == NULL) continue;
        //采用的是桶状链表形式
        /* For each hash entry on this slot... */
        he = ht->table[i];
        //在遍历的过程中对每一个元素又求取出在新Hash表中相应的位置 
        while(he) {
            unsigned int h;
            nextHe = he->next;
            /* Get the new element index */
            //dictHashKey(ht,he->key)获取相应的键值key,实际上就是取模运算
            //求取在新hash表中的元素的位置
            h = dictHashKey(ht, he->key) & n.sizemask;
            //采用的是头插入法进行的插入
            he->next = n.table[h];
            n.table[h] = he;
            ht->used--;
            /* Pass to the next element */
            he = nextHe;
        }
    }
    //断言原Hash表中已经没有记录了
    assert(ht->used == 0);
    //将原Hash表进行释放 
    _dictFree(ht->table);
 
    /* Remap the new hashtable in the old */
    //将新Hash表作为值进行赋值
    *ht = n;
    //返回创建成功
    return DICT_OK;
}

/* Add an element to the target hash table */
/**
 * 向Hash表中增加元素
 * 增加元素的键为key,值为vals
 */
int dictAdd(dict *ht, void *key, void *val)
{
    int index;
    dictEntry *entry;

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    if ((index = _dictKeyIndex(ht, key)) == -1)
        return DICT_ERR;

    /* Allocates the memory and stores key */
    //分配内存空间
    entry = _dictAlloc(sizeof(*entry));
    //将其放入相应的slot里面
    //采用的是头插入法进行插入
    entry->next = ht->table[index];
    ht->table[index] = entry;

    /* Set the hash entry fields. */
    dictSetHashKey(ht, entry, key);
    dictSetHashVal(ht, entry, val);
    //使用的记录数进行+1操作
    ht->used++;
    //返回OK标记
    return DICT_OK;
}

/* Add an element, discarding the old if the key already exists */
//向hash表中增加一个元素，如果Hash表中已经有该元素的话
//则将该元素进行替换掉
int dictReplace(dict *ht, void *key, void *val)
{
    dictEntry *entry;

    /* Try to add the element. If the key
     * does not exists dictAdd will suceed. */
    if (dictAdd(ht, key, val) == DICT_OK)
        return DICT_OK;
    /* It already exists, get the entry */
    //如果已经存在的话，则获取相应的位置 
    entry = dictFind(ht, key);
    /* Free the old value and set the new one */
    //将原Hash表中该entry的值进行释放
    //避免内存泄露
    dictFreeEntryVal(ht, entry);
    //给该节点设置新值
    dictSetHashVal(ht, entry, val);
    //返回成功标记
    return DICT_OK;
}

/**
 * 从Hash表中删除指定的key
 */
/* Search and remove an element */
static int dictGenericDelete(dict *ht, const void *key, int nofree)
{
    unsigned int h;
    dictEntry *he, *prevHe;

    if (ht->size == 0)
        return DICT_ERR;
    /**
     *  返回key对应的dictEntry
     */
    h = dictHashKey(ht, key) & ht->sizemask;
    he = ht->table[h];

    prevHe = NULL;
    while(he) {
        if (dictCompareHashKeys(ht, key, he->key)) {
            /* Unlink the element from the list */
            if (prevHe)
                prevHe->next = he->next;
            else
                ht->table[h] = he->next;
            if (!nofree) {
                //如果需要释放的情况下，则进行相应的释放操作
                dictFreeEntryKey(ht, he);
                dictFreeEntryVal(ht, he);
            }
            _dictFree(he);
            //记录数相应的进行减小
            ht->used--;
            return DICT_OK;
        }
        //进行相应的赋值操作
        prevHe = he;
        he = he->next;
    }
    //返回错误
    return DICT_ERR; /* not found */
}

/**
 * 释放指定的键值
 */
int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0);
}

/**
 * 从Hash表中删除key对应的记录，但是不
 * 删除相应的key以及value
 */
int dictDeleteNoFree(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,1);
}

/* Destroy an entire hash table */
/**
 * 释放整个Hash表
 */
int _dictClear(dict *ht)
{
    unsigned long i;
    /* Free all the elements */
    /**
     * 将所有的元素进行释放 
     */
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;
        //表标桶状结构中没有链表元素
        if ((he = ht->table[i]) == NULL) continue;
        //循环进行遍历
        while(he) {
            nextHe = he->next;
            //释放键
            dictFreeEntryKey(ht, he);
            //释放值 
            dictFreeEntryVal(ht, he);
            //释放结构体
            _dictFree(he);
            //记录数作相应的减法
            ht->used--;
            he = nextHe;
        }
    }
    /* Free the table and the allocated cache structure */
    //释放整个Hash表
    _dictFree(ht->table);
    /* Re-initialize the table */
    //重新初始化整个Hash表，Hash结构还是要保留的
    _dictReset(ht);
    return DICT_OK; /* never fails */
}

/* Clear & Release the hash table */
/**
 * 释放Hash表
 * 整个Hash表连同Hash结构都将释放 
 */
void dictRelease(dict *ht)
{
    //清除Hash表中的数据
    _dictClear(ht);
    //对空间进行释放 
    _dictFree(ht);
}

/**
 *  从HashTable中查找key的相应的dictEntry
 */
dictEntry *dictFind(dict *ht, const void *key)
{
    dictEntry *he;
    unsigned int h;
    //如果Hash表的大小为0,则直接返回NULL
    if (ht->size == 0) return NULL;
    h = dictHashKey(ht, key) & ht->sizemask;
    he = ht->table[h];
    while(he) {
        if (dictCompareHashKeys(ht, key, he->key))
            return he;
        he = he->next;
    }
    return NULL;
}

/** 
 * 获取Hash表中的相应的迭代器
 */
dictIterator *dictGetIterator(dict *ht)
{
    //给迭代器分配内存空间
    dictIterator *iter = _dictAlloc(sizeof(*iter));
    //对迭代器进行相应的初始化
    iter->ht = ht;
    iter->index = -1;
    iter->entry = NULL;
    iter->nextEntry = NULL;
    return iter;
}

/**
 * 对Hashtable进行迭代遍历操作
 */
dictEntry *dictNext(dictIterator *iter)
{
    while (1) {
        if (iter->entry == NULL) {
            iter->index++;
            //如果遍历的index大于整个Hashtable数组的大小时
            //说明已经遍历完成，直接跳出
            if (iter->index >=
                    (signed)iter->ht->size) break;
            iter->entry = iter->ht->table[iter->index];
        } else {
            //遍历到下一个元素
            iter->entry = iter->nextEntry;
        }
        if (iter->entry) {
            /* We need to save the 'next' here, the iterator user
             * may delete the entry we are returning. */
            //返回遍历过程中的下一个元素
            iter->nextEntry = iter->entry->next;
            //返回当前遍历的元素
            return iter->entry;
        }
    }
    return NULL;
}

/**
 * 释放迭代器把指向的空间
 */
void dictReleaseIterator(dictIterator *iter)
{
    _dictFree(iter);
}

/* Return a random entry from the hash table. Useful to
 * implement randomized algorithms */
/**
 * 从Hashtable中获取随机的key
 */
dictEntry *dictGetRandomKey(dict *ht)
{
    dictEntry *he;
    unsigned int h;
    int listlen, listele;
    //如果整个HashTable中压根没有记录时
    //直接返回NULL
    if (ht->used == 0) return NULL;
    //否则随机选择一个HashTable里面的slot
    do {
        h = random() & ht->sizemask;
        he = ht->table[h];
    } while(he == NULL);

    /* Now we found a non empty bucket, but it is a linked
     * list and we need to get a random element from the list.
     * The only sane way to do so is to count the element and
     * select a random index. */
    //计算出处于这个slot里面的元素数目
    listlen = 0;
    while(he) {
        he = he->next;
        listlen++;
    }
    //从整个slot链表中选择元素的位置
    listele = random() % listlen;
    he = ht->table[h];
    //指针指向该链表的位置 
    while(listele--) he = he->next;
    return he;
}

/* ------------------------- private functions ------------------------------ */

/* Expand the hash table if needed */
/**
 * 判断Hash表的大小是否需要扩充
 */
static int _dictExpandIfNeeded(dict *ht)
{
    /* If the hash table is empty expand it to the intial size,
     * if the table is "full" dobule its size. */
    //如果目前的hashtable的大小为0，则将大小设置为4
    if (ht->size == 0)
        return dictExpand(ht, DICT_HT_INITIAL_SIZE);
    //如果hash表里数据记录数已经与hashtable的大小相同的话，则将大小扩充为2倍
    if (ht->used == ht->size)//里面的记录数已经达到了hashtable的大小时，则需要进行扩充空间
        return dictExpand(ht, ht->size*2);
    return DICT_OK;
}

/* Our hash table capability is a power of two */
/**
 * Hash表的大小为2的指数幂
 */
static unsigned long _dictNextPower(unsigned long size)
{
    //将i设置为hash表的初始大小即为4
    unsigned long i = DICT_HT_INITIAL_SIZE;
    //返回2的指数幂中与size大小最接近且比size大小的值
    if (size >= LONG_MAX) return LONG_MAX;
    while(1) {
        if (i >= size)
            return i;
        //
        i *= 2;
    }
}

/* Returns the index of a free slot that can be populated with
 * an hash entry for the given 'key'.
 * If the key already exists, -1 is returned. */
/**
 * 返回key在hash表的位置，如果key在Hash表里已经存在，
 * 则返回-1
 */
static int _dictKeyIndex(dict *ht, const void *key)
{
    unsigned int h;
    dictEntry *he;
    /**
     * 判断是否需要扩充Hash表
     */
    /* Expand the hashtable if needed */
    if (_dictExpandIfNeeded(ht) == DICT_ERR)
        return -1;
    /* Compute the key hash value */
    /**
     * 获取Hash表中key对应元素位置【即Hash表中的位置】 
     */
    h = dictHashKey(ht, key) & ht->sizemask;
    /* Search if this slot does not already contain the given key */
    he = ht->table[h];
    while(he) {
        //如果已经存在了话，即键值相等的话
        if (dictCompareHashKeys(ht, key, he->key))
            return -1;
        he = he->next;
    }
    //否则的话，就返回相应的slot位置 
    return h;
}

/**
 * 清空HashTable
 * 清空后HashTable进行了重新的初始化
 */
void dictEmpty(dict *ht) {
    _dictClear(ht);
}

/**
 * 打印出HashTable表中数据的当前状态
 * maxchainlen最大链表的长度
 * chainlen
 * slot表示Hashtable中已使用的桶数
 */
#define DICT_STATS_VECTLEN 50
void dictPrintStats(dict *ht) {
    unsigned long i, slots = 0, chainlen, maxchainlen = 0;
    unsigned long totchainlen = 0;
    unsigned long clvector[DICT_STATS_VECTLEN];

    //如果Hashtable中为0记录数，则打印出空Hashtable
    if (ht->used == 0) {
        printf("No stats available for empty dictionaries\n");
        return;
    }
    //对clvector数组进行相应的初始化
    for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
    for (i = 0; i < ht->size; i++) {
        dictEntry *he;

        if (ht->table[i] == NULL) {
            //从这里可以看出clvector[0]记录Hashtable中的空槽数
            clvector[0]++;
            continue;
        }
        //否则的知槽数++
        slots++;
        /* For each hash entry on this slot... */
        chainlen = 0;
        he = ht->table[i];
        //算出槽中的链表长度
        while(he) {
            chainlen++;
            he = he->next;
        }
        //如果小于50的话，则clvector相应的记录增加
        //例如clvector[2]=10,表示Hashtable中槽里面链表长度为2的有10个
        //超过50的全部放在clvector[49]里面
        clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
        if (chainlen > maxchainlen) maxchainlen = chainlen;
        totchainlen += chainlen;
    }
    //将状态信息打印出来
    printf("Hash table stats:\n");
    //hashtable的大小
    printf(" table size: %ld\n", ht->size);
    //hashtable中存在的记录数
    printf(" number of elements: %ld\n", ht->used);
    //hashtalbe中已使用的槽数
    printf(" different slots: %ld\n", slots);
    //hashtable中最大键的长度
    printf(" max chain length: %ld\n", maxchainlen);
    //hashtable中平均链表的长度
    printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
    //和上一个值理论上来讲应该是一样的
    printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
    printf(" Chain length distribution:\n");
    //打印出链表中的各个槽里面的链表的长度记录
    for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
        if (clvector[i] == 0) continue;
        printf("   %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
    }
}

/* ----------------------- StringCopy Hash Table Type ------------------------*/

/**
 * Hash函数(字符串复制相关的Hash表函数)
 */
static unsigned int _dictStringCopyHTHashFunction(const void *key)
{
    return dictGenHashFunction(key, strlen(key));
}
/**
 * 键值复制相关的函数
 */
static void *_dictStringCopyHTKeyDup(void *privdata, const void *key)
{
    int len = strlen(key);
    char *copy = _dictAlloc(len+1);
    /**
     * #define DICT_NOTUSED(V) ((void) V);
     */
    DICT_NOTUSED(privdata);
    //进行键值的复制操作
    memcpy(copy, key, len);
    //在字符串未尾加了\0标识字符串的结束
    copy[len] = '\0';
    return copy;
}
 
/**
 * HashTable中值复制操作
 */
static void *_dictStringKeyValCopyHTValDup(void *privdata, const void *val)
{  
    //获取值的长度
    int len = strlen(val);
    //分配内存空间
    char *copy = _dictAlloc(len+1);
    DICT_NOTUSED(privdata);
    //进行内存复制的操作
    memcpy(copy, val, len);
    copy[len] = '\0';
    return copy;
}

/**
 * 键值的比较函数
 * 比较key1与key2的值是否相等
 */
static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
        const void *key2)
{
    DICT_NOTUSED(privdata);

    return strcmp(key1, key2) == 0;
}

/**
 * HashTable的析构函数
 */
static void _dictStringCopyHTKeyDestructor(void *privdata, void *key)
{
    DICT_NOTUSED(privdata);
    //释放key所占用的内存空间
    _dictFree((void*)key); /* ATTENTION: const cast */
}

/**
 * HashTable中释放值
 */
static void _dictStringKeyValCopyHTValDestructor(void *privdata, void *val)
{
    DICT_NOTUSED(privdata);
    //释放值所占用的内存空间
    _dictFree((void*)val); /* ATTENTION: const cast */
}

寇浩哲

发布了257 篇原创文章 · 获赞 223 · 访问量 32万+

他的留言板关注

redis源码剖析（七）—— Redis 数据结构dict.c

文章目录

dict.h

dict.c

猜你喜欢