1、哈希表

//dict.h

typedef struct dictEntry {
    void *key;   //键
    union {
        void *val;  可以指向不同类型  redisobject
        uint64_t u64;  //用于redis集群 哨兵模式 选举算法
        int64_t s64;  //记录过期时间
        double d;
    } v;  //值
    struct dictEntry *next;  //指向下个哈希表节点，形成链表（解决哈希冲突使用，将多个哈希值相同的键链接在一起）
} dictEntry;

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
    dictEntry **table;    //table是一个数组，数组中每个元素都是指向dictEntry结构的指针，每个dictEntry结构体保存着一个键值对
    unsigned long size;   //size是哈希表的大小，也就是table数组的大小，默认值为4.
    unsigned long sizemask;  //sizemask总是等于size-1，这个属性和哈希值一起决定一个键应该被放在table数组的哪个索引上面；
    unsigned long used;   //hash表里已有的数量
} dictht;

我们先来看一个空的hash表：

然后再来展示下如何通过next指针，将两个索引值相同的键k1和k0连接在一起：

上面的结构就是我们常说的数组+链表(链表采用头插法)

2、字典

typedef struct dictType {
    uint64_t (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
    int (*expandAllowed)(size_t moreMem, double usedRatio);
} dictType;

typedef struct dict {
    dictType *type;  //该字典对应的特定操作函数
    void *privdata;  //该字典依赖的数据，上下文，具体是指操作上下文的对象
    dictht ht[2];    //哈希表，二维的，默认使用ht[0]，当需要rehash的时候，会利用ht[1]进行
    long rehashidx; /* rehashing not in progress if rehashidx == -1 ，rehash的索引，记录了rehash目前的进度，当没有进行rehash时其值为-1*/
    int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) ,hash表的迭代器，一般用于rehash和主从复制等等 */ 
} dict;

type属性和privdata属性是针对不同类型的键值对，为创建多态字典而设置的：

type属性是一个指向dictType结构的指针，每个dictType结构保存了一簇用于操作特定类型键值对的函数，Redis会为用途不同的字典设置不同的类型特定函数。
Privdata属性则保存了需要传给那些类型特定函数的可选参数。

ht属性是一个包含两个项的数组，数组中的每个项都是一个dictht哈希表，一般情况下，字典只使用ht[0]哈希表，ht[1]哈希表只会在对ht[0]哈希表进行rehash的时候使用。

我们来展示一个普通的字典：

/* Low level add or find:
 * This function adds the entry but instead of setting a value returns the
 * dictEntry structure to the user, that will make sure to fill the value
 * field as they wish.
 *
 * This function is also directly exposed to the user API to be called
 * mainly in order to store non-pointers inside the hash value, example:
 *
 * entry = dictAddRaw(dict,mykey,NULL);
 * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
 *
 * Return values:
 *
 * If key already exists NULL is returned, and "*existing" is populated
 * with the existing entry if existing is not NULL.
 *
 * If key was added, the hash entry is returned to be manipulated by the caller.
 */
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;
    dictht *ht;

    if (dictIsRehashing(d)) _dictRehashStep(d);  //如果字典d的rehash还没有操作完，那就在add的时候进行一部分rehash

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently. */
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];  //如果字典d的rehash还没有操作完，那就将新数据插入到d->ht[1]中去。
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    /* Set the hash entry fields. */
    dictSetKey(d, entry, key);
    return entry;
}

初始时，字典的hash表的大小只有4（sizemask为3），那么通过hash函数计算出的hash值可能很大，此时hash值会进行位运算（& sizemask），（n%size 等于 n & (size-1)），得到存储在hash表里的位置index。

3、redis的数据库的结构

在redis的内部，有一个redisServer结构体的全局变量server，server保存了redis服务端的所有信息，包括当前进程的PID、服务器的端口号、数据库个数、统计信息等等。当然，它也包含了数据库信息，包括数据库的个数、以及一个redisDb数组。

struct redisServer {
    ...
    redisDb *db;
    int dbnum;                      /* Total number of configured DBs */
};

显然，dbnum就是redis数组的长度，每一个数据库，都对应于一个redisDb，在redis的客户端中，可以通过select N来选择使用哪一个数据库，各个数据库之间互相独立。例如：可以在不同的数据库中同时存在名为“baichao”的key。

/* Redis database representation. There are multiple databases identified
 * by integers from 0 (the default database) up to the max configured
 * database. The database number is the 'id' field in the structure. */
typedef struct redisDb {
    dict *dict;                 /* The keyspace for this DB */
    dict *expires;              /* Timeout of keys with a timeout set */
    dict *blocking_keys;        /* Keys with clients waiting for data (BLPOP)*/
    dict *ready_keys;           /* Blocked keys that received a PUSH */
    dict *watched_keys;         /* WATCHED keys for MULTI/EXEC CAS */
    int id;                     /* Database ID */
    long long avg_ttl;          /* Average TTL, just for stats */
    unsigned long expires_cursor; /* Cursor of the active expire cycle. */
    list *defrag_later;         /* List of key names to attempt to defrag one by one, gradually. */
} redisDb;

server是一个全局变量，它包含了若干个redisDb，每一个redisDb是一个keyspace，各个keyspace互相独立，互不干扰。

redis的每一个数据库是一个独立的keyspace，因为我们可能会认为，redis的数据库是一个hash表。但是，从redisDb的定义来看，它并不是一个hash表，而是一个包含很多hash表的结构。之所以这么做，是因为redis还需要提供除了set、get以外更加丰富的功能（例如：键的超时机制）

Redis存储结构源码分析

1、哈希表

2、字典

3、redis的数据库的结构

猜你喜欢