redis之dict

类型介绍

dict部分是redis的内存核心，其实质就是一个哈希表结构，这点和memcached有着异曲同工之妙，只不过memcached在查找和分配内存上做了分离，将slabs和assoc建立了联系，而redis更加简洁，直接就包含了两者，这是因为redis没有固定内存的格式大小，不需要提前去预分配那些内存空间，另外应该可以提到一点就是redis不使用memcached的内存分配方式的另一个原因应该就是支持类型的问题，后面可以看到的，redis支持的数据类型要比memcached丰富的多，之所以两者都采用哈希表结果作为存储核心，这是由缓存的特性和要求决定的，缓存就是用来提高访问速度的，哈希表能够提供接近于O(1)的时间复杂度的增删改查操作；
上面提到了数据操作，dict部分作为底层数据结构，同样提供了增加、删除、修改等操作以及迭代器部分，同时和memcached一样，dict部分同样存在两张哈希表，这两个表是用来调整内存大小的，当现在的哈希表大小不满足需求时就要动态调整大小，注意，这里面是动态调整，也就是说redis的rehash过程不是一次性完成，这点和memcached不一样，这是因为memcached的架构是多线程的，可以通过加锁的方式不影响数据访问，而redis是单线程的，如果不是增量式的调整大小，会导致程序阻塞。

代码分析

自定义类型

typedef struct dictEntry {
    void *key;   //键值
    union {
        void *val;   //自定义类型
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;  //链表
} dictEntry;

typedef struct dictType {
    unsigned int (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

typedef struct dictht {
    dictEntry **table;
    unsigned long size;
    unsigned long sizemask;
    unsigned long used;
} dictht;   //字典，哈希表

typedef struct dict {
    dictType *type;
    void *privdata;
    dictht ht[2];  //两个，类似于memcached中primary_hashtable和old_hashtable的功能
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    unsigned long iterators; /* number of iterators currently running */
} dict;  //字典

typedef struct dictIterator {
    dict *d;
    long index;
    int table, safe;
    dictEntry *entry, *nextEntry;
    /* unsafe iterator fingerprint for misuse detection. */
    long long fingerprint;
} dictIterator;

定义变量

dict_can_resize，是否能够调整大小；
dict_force_resize_ratio，强制调整大小的比率；

代码分解

_dictReset，重置字典结构内部变量；

static void _dictReset(dictht *ht)
{
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}

_dictInit，初始化字典；

int _dictInit(dict *d, dictType *type,
        void *privDataPtr)
{
    _dictReset(&d->ht[0]);   //重置字典哈希表
    _dictReset(&d->ht[1]);
    d->type = type;
    d->privdata = privDataPtr;
    d->rehashidx = -1;
    d->iterators = 0;
    return DICT_OK;
}

dictCreate，创建一个字典结构；

dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    dict *d = zmalloc(sizeof(*d));

    _dictInit(d,type,privDataPtr);
    return d;
}

dictResize，调整字典大小；

int dictResize(dict *d)
{
    int minimal;

    //只有当字典允许哈希，且当前不在rehash的时候才能调整大小
    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
    minimal = d->ht[0].used;   //已使用空间
    if (minimal < DICT_HT_INITIAL_SIZE)  
        minimal = DICT_HT_INITIAL_SIZE;  //至少是要达到hash表的最小值
    return dictExpand(d, minimal);
}

dictExpand，扩展字典；

int dictExpand(dict *d, unsigned long size)
{
    dictht n; /* the new hash table */
    //找到下一个适合当前大小的hash表大小，为2的幂数：
    unsigned long realsize = _dictNextPower(size);

    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    //这里说的是待扩展的大小应该比现在哈希表中已存在的数据量多
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    /* Rehashing to the same table size is not useful. */
    //如果大小与现在大小一样，失去了重新hash的意义
    if (realsize == d->ht[0].size) return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    n.size = realsize;    //新的哈希表大小
    n.sizemask = realsize-1;  //哈希掩码
    n.table = zcalloc(realsize*sizeof(dictEntry*));  //分配并初始化
    n.used = 0;  //已使用

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    //这里说的是，这个地方如果是第一次初始化的话，就不是一个rehash的过程，我们只需要将字典第一个哈希表设置为刚刚创建的hash表即可
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    d->ht[1] = n;  //如果是，将刚刚创建的赋给第二个用于增量哈希用
    d->rehashidx = 0;
    return DICT_OK;
}

dictRehash，rehash过程，每次移动n位；

int dictRehash(dict *d, int n) {
    //待访问的空槽位的数目
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;  //rehash中直接返回

    //循环次数完毕，或者已使用的以为0，调整完毕
    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        //一直找到不为空的槽
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;  //如果为空，则递增
            if (--empty_visits == 0) return 1;  //到达设定的最大空槽数则退出，返回1表示rehash过程未完成
        }
        de = d->ht[0].table[d->rehashidx];  //此时找到的不为空的槽
        /* Move all the keys in this bucket from the old to the new hash HT */
        //下面将所有槽中数据移动到新的hash表中
        while(de) {  //这里需要注意的是，并不是说把某个槽里面的一个链表整体移到一个新hash表的槽里面就可以了，而是需要将该槽里面的每个元素遍历，然后根据key值重新计算该元素在新的hash表中应该属于的槽
            unsigned int h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;  //计算在hash表中新的位置
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;  //将原来的槽置空
        d->rehashidx++;  //进行下一个槽的遍历
    }

    /* Check if we already rehashed the whole table... */
    if (d->ht[0].used == 0) {  //如果已完成所有槽的rehash
        zfree(d->ht[0].table);   //释放原来hash表的数据槽
        d->ht[0] = d->ht[1];  //将新的再次转移到0号位上
        _dictReset(&d->ht[1]);  //重置1号位的hash表
        d->rehashidx = -1;  //新的，将rehash重新置位
        return 0;
    }

    /* More to rehash... */
    return 1;  //未完成即退出，需要进行继续rehash
}

dictRehashMilliseconds, rehash过程，设定单次rehash时间；

int dictRehashMilliseconds(dict *d, int ms) {
    long long start = timeInMilliseconds();
    int rehashes = 0;

    while(dictRehash(d,100)) {
        rehashes += 100;
        if (timeInMilliseconds()-start > ms) break;
    }
    return rehashes;
}

dictAdd，添加元素；

int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key);  //为当前key找到一个内存区，用于设置value

    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);  //设置value
    return DICT_OK;
}

dictAddRaw

dictEntry *dictAddRaw(dict *d, void *key)
{
    int index;
    dictEntry *entry;
    dictht *ht;

    //正在rehash中，进行一步rehash
    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    //找到可以存放key的空槽，返回索引，如果已存在，返回-1
    if ((index = _dictKeyIndex(d, key)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently. */
    //下面是说如果找到了index，则申请内存，然后将该entry添加到hash表的头部
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    /* Set the hash entry fields. */
    //设置键值
    dictSetKey(d, entry, key);
    return entry;
}

dictReplace，替换

int dictReplace(dict *d, void *key, void *val)
{
    dictEntry *entry, auxentry;

    /* Try to add the element. If the key
     * does not exists dictAdd will suceed. */
    //首先添加该k/v，如果键值不存在，则会添加成功
    if (dictAdd(d, key, val) == DICT_OK)
        return 1;
    /* It already exists, get the entry */
    //如果存在，则找到该entry地址
    entry = dictFind(d, key);
    /* Set the new value and free the old one. Note that it is important
     * to do that in this order, as the value may just be exactly the same
     * as the previous one. In this context, think to reference counting,
     * you want to increment (set), and then decrement (free), and not the
     * reverse. */
    auxentry = *entry;
    //重新设置该entry的value
    dictSetVal(d, entry, val);
    //释放该value
    dictFreeVal(d, &auxentry);
    return 0;
}

dictReplaceRaw，这个函数实际上是一个简化版的dictAddRaw，这里肯定会返回一个给定key的存储空间；

dictEntry *dictReplaceRaw(dict *d, void *key) {
    dictEntry *entry = dictFind(d,key);  //首先查找是否存在该key的entry

    return entry ? entry : dictAddRaw(d,key);  //有直接发挥，没有新增该key
}

dictGenericDelete，通用删除方式，先查找，后删除

static int dictGenericDelete(dict *d, const void *key, int nofree)
{
    unsigned int h, idx;
    dictEntry *he, *prevHe;
    int table;

    if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);   //计算该key的hash值

    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        prevHe = NULL;
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                /* Unlink the element from the list */
                //找到的话，从链表中移除该元素
                if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
                if (!nofree) {  //需要清空空间
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                }
                zfree(he);
                d->ht[table].used--;
                return DICT_OK;
            }
            prevHe = he;
            he = he->next;
        }
        //如果没有在rehash过程中，则跳出，不在1号table中查找
        if (!dictIsRehashing(d)) break;
    }
    return DICT_ERR; /* not found */
}

dictDelete，删除某个key对应的entry；

int dictDelete(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,0);  //释放空间
}

dictDeleteNoFree，删除某个key对应的entry，不释放空间；

int dictDeleteNoFree(dict *ht, const void *key) {
    return dictGenericDelete(ht,key,1);
}

_dictClear，释放整个哈希表；

int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
    unsigned long i;

    /* Free all the elements */
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;

        if (callback && (i & 65535) == 0) callback(d->privdata);

        if ((he = ht->table[i]) == NULL) continue;
        while(he) {
            nextHe = he->next;
            dictFreeKey(d, he);
            dictFreeVal(d, he);
            zfree(he);
            ht->used--;
            he = nextHe;
        }
    }
    /* Free the table and the allocated cache structure */
    zfree(ht->table);
    /* Re-initialize the table */
    _dictReset(ht);
    return DICT_OK; /* never fails */
}

dictRelease，释放字典；

void dictRelease(dict *d)
{
    _dictClear(d,&d->ht[0],NULL);
    _dictClear(d,&d->ht[1],NULL);
    zfree(d);
}

dictFind，查找某个key对应的entry;

dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    unsigned int h, idx, table;

    //dict都为空
    if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);   //计算哈希值
    for (table = 0; table <= 1; table++) {  //都是在两表里面查
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        //没有在rehash过程中，直接退出，返回NULL
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

dictFetchValue，获取该key对应的value；

void *dictFetchValue(dict *d, const void *key) {
    dictEntry *he;

    he = dictFind(d,key);  //首先查找该key对应的entry
    return he ? dictGetVal(he) : NULL;  //找到的话才去获取value，没找到返回NULL
}

dictGetIterator，获取一个dict字典的迭代器；

dictIterator *dictGetIterator(dict *d)
{
    dictIterator *iter = zmalloc(sizeof(*iter));

    iter->d = d;  
    iter->table = 0;
    iter->index = -1;
    iter->safe = 0;
    iter->entry = NULL;
    iter->nextEntry = NULL;
    return iter;
}

dictGetSafeIterator，获取一个安全迭代器；

dictIterator *dictGetSafeIterator(dict *d) {
    dictIterator *i = dictGetIterator(d);

    i->safe = 1;  //safe标记位置1
    return i;
}

dictNext，获取下一个entry，这个函数有点意思，意思在于这个迭代器和之前链表的迭代器有所不同，这里不仅需要判断是在哪个bucket里面，也需要判断是在哪个table中，最后还要考虑下是在哪个链表中的哪个entry；

dictEntry *dictNext(dictIterator *iter)
{
    while (1) {
        if (iter->entry == NULL) {  //如果当前指向的entry为空，可能是迭代器的开始，也可能是一个链表迭代到头了，也有可能是当前正在rehash中，数据已经转移了
            dictht *ht = &iter->d->ht[iter->table];
            if (iter->index == -1 && iter->table == 0) {
            //当前索引为负，起始点
                if (iter->safe)
                    iter->d->iterators++;  //迭代器的个数
                else
                    iter->fingerprint = dictFingerprint(iter->d);
            }
            iter->index++;  //索引加1
            if (iter->index >= (long) ht->size) {
            //超过size，则重新开始
                if (dictIsRehashing(iter->d) && iter->table == 0) {  //如果正在rehash
                    iter->table++;  //跳到1号哈希表
                    iter->index = 0;  //从头开始
                    ht = &iter->d->ht[1];  //获取当前哈希表
                } else {
                    break;  //如果是超出了哈希表的大小，且没有rehash，则会判断出错，返回NULL
                }
            }
            iter->entry = ht->table[iter->index];  //得到当前entry
        } else {
            iter->entry = iter->nextEntry;  //已存在，将该entry的后置节点赋值给
        }
        if (iter->entry) {
            /* We need to save the 'next' here, the iterator user
             * may delete the entry we are returning. */
            iter->nextEntry = iter->entry->next;
            return iter->entry;
        }
    }
    return NULL;
}

dictReleaseIterator，释放迭代器；

void dictReleaseIterator(dictIterator *iter)
{
    //如果该迭代器尚未添加到一个字典中，则跳过处理，直接释放
    if (!(iter->index == -1 && iter->table == 0)) {
        if (iter->safe)
            iter->d->iterators--;
        else
            assert(iter->fingerprint == dictFingerprint(iter->d));
    }
    zfree(iter);
}

类型介绍

代码分析

自定义类型

定义变量

代码分解

猜你喜欢