Redis cache deletion mechanism (source code analysis)

Deleted range

  1. Expired key
  2. When the memory is full, if you continue to execute the set command and all the keys have not expired, then the key selected according to the cache elimination strategy will be used

Expired delete

The key with expiration time set in redis will be stored separately

typedef struct redisDb {
    dict *dict;          // 所有的键值对
    dict *expires;       //设置了过期时间的键值对
    // ...
} redisDb;
复制代码

Set validity period

There are 4 commands in Redis to set the expiration time for the key, namely expire pexpire expireat pexpireat 

Set relative time

  • expire <key> <ttl>: The keyexpiration time value is set ttl in seconds .
// src/expire.c

/* EXPIRE key seconds */
void expireCommand(client *c) {
    expireGenericCommand(c,mstime(),UNIT_SECONDS);
}
复制代码
  • pexpire <key> <ttl>: The keyexpiration time value is set ttl in milliseconds .
// src/expire.c

/* PEXPIRE key milliseconds */
void pexpireCommand(client *c) {
    expireGenericCommand(c,mstime(),UNIT_MILLISECONDS);
}
复制代码

Set absolute time

  • expireat <key> <timestamp>: The keyexpiration time value to the specified timestamp number of seconds .
// src/expire.c

/* EXPIREAT key time */
void expireatCommand(client *c) {
    expireGenericCommand(c,0,UNIT_SECONDS);
}
复制代码
  • pexpireat <key> <timestamp>: The keyexpiration time value to the specified timestamp number of milliseconds .
// src/expire.c

/* PEXPIREAT key ms_time */
void pexpireatCommand(client *c) {
    expireGenericCommand(c,0,UNIT_MILLISECONDS);
}
复制代码

The above 4 methods will eventually call the following general functions expireGenericCommand :

// src/expire.c

void expireGenericCommand(client *c, long long basetime, int unit) {
    robj *key = c->argv[1], *param = c->argv[2];
    
    // 获取数据对象
    long long when;
    if (getLongLongFromObjectOrReply(c, param, &when, NULL) != C_OK)
        return;

    // 将时间转化成以 ms 为单位
    if (unit == UNIT_SECONDS) when *= 1000;
    when += basetime;
    // 在 master 节点上,如果设置的过期时间小于当前时间,那么将命令转化成 DEL 指令
    if (when <= mstime() && !server.loading && !server.masterhost) {
        robj *aux;

        int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db,key) :
                                                    dbSyncDelete(c->db,key);
        // ...
        // 将删除命令同步给 slave 和 AOF
        // ...
    } else {
        // 设置过期时间
        setExpire(c,c->db,key,when);
        // ...
        // 构造返回值和发布对象更新消息
        // ...
        return;
    }
}
复制代码

Set the expiration time of the operation by the setExpire execution, he will be dictEntry in union v the s64 set time expires

// src/db.c

void setExpire(client *c, redisDb *db, robj *key, long long when) {
    dictEntry *kde, *de;

    // 找出 db->dict 中对应的存储对象,这里的查询和用 get 查询数据是逻辑一样,通过 hashFunc(key) & sizemask 
    // 找到 bucket 后在链表中遍历
    kde = dictFind(db->dict,key->ptr);
    // 找出 db->expires 中对应的存储对象,如果没有则新建一个
    de = dictAddOrFind(db->expires,dictGetKey(kde));
    // 
    dictSetSignedIntegerVal(de,when);
	// ...
}

#define dictSetSignedIntegerVal(entry, _val_) \
    do { (entry)->v.s64 = _val_; } while(0)
复制代码

db->expires What is stored in dictEntry is the expiration key and expiration time, and the expiration time is stored v in one union . It can be seen that different usage scenarios or different codes in redis have different v meanings

typedef struct dictEntry {
    void *key;
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;
} dictEntry;
复制代码

Query expiration time

  • ttl keyReturns the keyremaining number of seconds expired.
// src/expire.c

/* TTL key */
void ttlCommand(client *c) {
    ttlGenericCommand(c, 0);
}
复制代码
  • pttl keyReturns the keynumber of milliseconds remaining expired.
// src/expire.c

/* PTTL key */
void pttlCommand(client *c) {
    ttlGenericCommand(c, 1);
}
复制代码

The above two viewing methods will eventually call the following general functions ttlGenericCommand :

// src/expire.c

/* Implements TTL and PTTL */
void ttlGenericCommand(client *c, int output_ms) {
    // ...
    // key 不存在时报错
    // ...
    
    // 获取过期时间,如果没有过期时间则
    expire = getExpire(c->db,c->argv[1]);
    if (expire != -1) {
        ttl = expire-mstime();
        if (ttl < 0) ttl = 0;
    }
    
    if (ttl == -1) {
        addReplyLongLong(c,-1);
    } else {
        // 根据指定的单位返回结果,以秒为单位时向上取整
        addReplyLongLong(c,output_ms ? ttl : ((ttl+500)/1000));
    }
}
复制代码

The operation of obtaining the expiration time is getExpire executed,  and the member is db->expires obtained after the object is queried union vins64 

// src/expire.c

// 返回过期时间的绝对时间
long long getExpire(redisDb *db, robj *key) {
    dictEntry *de;

    // 查询对象
    if (dictSize(db->expires) == 0 ||
        // 如果返回为 NULL 表示没有设置过期时间,向上返回 -1
       (de = dictFind(db->expires,key->ptr)) == NULL) return -1;
	
    // 获取 v.s64
    return dictGetSignedIntegerVal(de);
}

#define dictGetSignedIntegerVal(he) ((he)->v.s64)
复制代码

Expiration strategy

Redis integrated use of inert delete and regularly scan realization

Lazy deletion

It will be called every time you visit to expireIfNeeded determine whether the key has expired. If it expires, the key will be deleted, otherwise the value corresponding to the key will be returned. Using this strategy alone may waste a lot of memory.

// src/db.c

int expireIfNeeded(redisDb *db, robj *key) {
    mstime_t when = getExpire(db,key);
    mstime_t now;
	
    // 没有设置过期时间,直接返回
    if (when < 0) return 0;

    // 从硬盘中加载数据时不执行过期操作
    if (server.loading) return 0;

    // 参考 GitHub Issue #1525
    // 对于 master,在执行 Lua Script 的过程中,可能会用某个 key 是否存在当作判断条件
    // 为了避免一个脚本中前后条件不一致,将当前时间强制设为脚本开始时间    
    now = server.lua_caller ? server.lua_time_start : mstime();

    // 对于 slave,返回此时 key 是否已过期,但不执行后续删除操作
    if (server.masterhost != NULL) return now > when;

    // key 未过期
    if (now <= when) return 0;

    // 统计过期 key 的个数
    server.stat_expiredkeys++;
    // 向所有的 slave 和 AOF 文件写入一条 DEL 指令
    propagateExpire(db,key,server.lazyfree_lazy_expire);
    // 向 keyspace channel 中发布一条 key 过期的消息
    notifyKeyspaceEvent(NOTIFY_EXPIRED,
        "expired",key,db->id);
    // 根据配置决定是同步删除还是异步删除(仅删除引用,由后台线程执行物理删除)
    return server.lazyfree_lazy_expire ? dbAsyncDelete(db,key) :
                                         dbSyncDelete(db,key);
}
复制代码

Special treatment

  1. When the Lua script is executed on the master node

Refer to GitHub Issue #1525. For the master, in the process of executing Lua Script, the existence of a key may be used as a judgment condition. In order to avoid inconsistent conditions in a script, the current time is forced to be the script start time. For example /tmp/myscript.lua , the results of multiple executions of the following Lua script may be inconsistent

-- /tmp/myscript.lua

if redis.call("exists",KEYS[1]) == 1
then
    redis.call("incr","mycounter")
end

if redis.call("exists",KEYS[1]) == 1
then
    return redis.call("incr","mycounter")
end
复制代码

The specific reproduction operation can refer to the following bash script:

while [ 1 ]
do
    redis-cli set x foo px 100 > /dev/null
    sleep 0.092
    redis-cli --eval /tmp/myscript.lua x > /dev/null
    sleep 0.1
    redis-cli get mycounter
    redis-cli -p 6380 get mycounter
done
复制代码
  1. For slave node

On the slave node, the delete operation of the key is DEL executed by the master , so here only the expired result is returned to the client, and the delete operation is not executed

  1. Skip this step when reading data from RDB and AOF

Scan regularly

The system scans regularly every once in a while, and deletes expired keys. Using this strategy alone may cause the key to expire but not delete

By default, Redis executes an hz expired scan every 100ms (through parameter configuration, the execution cycle is 1s/hz). Since the keys with an expiration time set in redisDb will be stored separately, there will be no case of scanning all keys. The specific steps are activeExpireCycle executed by the function.

Background operations such as activeExpireCycle and incrementallyRehash are all triggered by databasesCron

void activeExpireCycle(int type) {
    // ...
 
	// 依次遍历各个 db
    for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
        int expired;
        redisDb *db = server.db+(current_db % server.dbnum);

        // 记录下一个执行的 db,这样如果因为超时意外退出,下次可以继续从这个 db 开始,
        // 从而在所有 db 上均匀执行清除操作
        current_db++;

        do {
            // ...
            // 跳过没有设置过期时间的 key 等不需要执行的情况
            // ...

            // 抽样个数,默认为 20
            if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP)
                num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP;

            // 从设置了过期时间的 key 中随机抽取 20 个
            while (num--) {
                dictEntry *de;
                long long ttl;

                // 随机挑选 dict 中的一个 key
                if ((de = dictGetRandomKey(db->expires)) == NULL) break;
                ttl = dictGetSignedIntegerVal(de)-now;
                // 执行删除,具体删除操作和惰性删除中类似
                if (activeExpireCycleTryExpire(db,de,now)) expired++;
                // ...
            }
            // ...
            // 更新统计数据等操作
            // ...
        // 如果每次删除的 key 超过了样本数的 25%,说明过期键占的比例较高,需要再重复执行依次
        } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4);
    }
    // ...
}
复制代码

Random sampling is dictGetRandomKey performed by

// src/dict.c

/* Return a random entry from the hash table. Useful to
 * implement randomized algorithms */
dictEntry *dictGetRandomKey(dict *d)
{
    dictEntry *he, *orighe;
    unsigned long h;
    int listlen, listele;

    // 没有数据,返回为 NULL,外层函数接收到 NULL 后会中断过期操作的执行
    if (dictSize(d) == 0) return NULL;
    // 根据 rehashidx 参数判断是否正在执行 rehash,如果正在执行,
    // 则先执行 rehash 中的一个步骤
    if (dictIsRehashing(d)) _dictRehashStep(d);
    
    if (dictIsRehashing(d)) {
        do {
            // 正在执行 rehash,所以两个 ht 中的对象都要考虑
            //
            // 由于正在执行 rehash,所以可以肯定 ht[0] 中下标小于等于 rehashidx 的 bucket
            // 肯定没有数据,所以只从 ht[0] 中大于 rehashidx 的 bucket 和 ht[1] 中抽取
            h = d->rehashidx + (random() % (d->ht[0].size +
                                            d->ht[1].size -
                                            d->rehashidx));
            he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
                                      d->ht[0].table[h];
        // 取到空 bucket 时重试
        } while(he == NULL);
    } else {
        do {
            // 参考写入 ht 时计算下标的规则 hashFunc(key) & sizemake
            // 这里 random() & sizemask 是随机取一个下标
            h = random() & d->ht[0].sizemask;
            he = d->ht[0].table[h];
        // 取到空 bucket 时重试
        } while(he == NULL);
    }
    
    // 到这一步 he 是 ht[n] 中某个 bucket 中完整的链表
    // 所以还要从这个链表中随机取一个对象
	
    // 遍历计算整个链表的长度
    listlen = 0;
    orighe = he;
    while(he) {
        he = he->next;
        listlen++;
    }
    // 随机取链表中某个对象的下标
    listele = random() % listlen;
    he = orighe;
    // 重新遍历链表获取指定下标的对象
    while(listele--) he = he->next;
    return he;
}

复制代码

Cache elimination

Configure the maximum memory limit

Configure in redis.conf

The configuration file and command line parameters are loaded when the redis server starts, and maxmemorystored in the maxmemory field of the Server object

main Perform initialization and other operations when the redis server starts, and the loadServerConfig function to load the configuration file will be executed

// src/server.c
int main(int argc, char **argv) {
    // ..
    // 加载配置
    loadServerConfig(configfile,options);
    // ..
    // 警告过小的配置
    if (server.maxmemory > 0 && server.maxmemory < 1024*1024) {
        serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
    }
}
复制代码

loadServerConfig Load the configuration file, stdin, and command line parameters into the config string, and then call loadServerConfigFromString 

// src/config.c
void loadServerConfig(char *filename, char *options) {
    sds config = sdsempty();
    char buf[CONFIG_MAX_LINE+1];
	
    // 加载配置文件
    if (filename) {
        FILE *fp;

        // 启动命令为 ./redis-server - 则从 stdin 中读取,需要用 <C-D> 触发 EOF
        if (filename[0] == '-' && filename[1] == '\0') {
            fp = stdin;
        } else {
        // 第一个参数不是 -,则尝试打开这个参数指定的文件
            if ((fp = fopen(filename,"r")) == NULL) {
                serverLog(LL_WARNING,
                    "Fatal error, can't open config file '%s'", filename);
                exit(1);
            }
        }
        // 将配置文件中的每一行追加到 config 中
        while(fgets(buf,CONFIG_MAX_LINE+1,fp) != NULL)
            config = sdscat(config,buf);
        if (fp != stdin) fclose(fp);
    }
    // 添加其他选项,例如 ./redis-server --port 8080 后面的参数,直接加到 config 中
    if (options) {
        config = sdscat(config,"\n");
        config = sdscat(config,options);
    }
    loadServerConfigFromString(config);
    sdsfree(config);
}
复制代码

loadServerConfigFromStringconfig Read the configuration line by line from  the string in the previous step and write to the server object

// src/config.c
void loadServerConfigFromString(char *config) {
	// ...
    
    // 按行读取配置文件
    lines = sdssplitlen(config,strlen(config),"\n",1,&totlines);
    for (i = 0; i < totlines; i++) {
		// 跳过无效的配置和注释
        // ...
        argv = sdssplitargs(lines[i],&argc);
        
        // 将配置命令转化成小写
        sdstolower(argv[0]);

        // 根据配置命令初始化配置,strcasecmp 比较
        if (!strcasecmp(argv[0],"timeout") && argc == 2) {
            server.maxidletime = atoi(argv[1]);
            if (server.maxidletime < 0) {
                err = "Invalid timeout value"; goto loaderr;
            }
        // ...
        } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
            // memtoll 将字符串形式的配置转化成对应的 long long 值
            // 例如 1kb -> 1024
            server.maxmemory = memtoll(argv[1],NULL);
        }
    }
}
复制代码

Use CONFIG SETthe command to configure

Redis Server receives the client CONFIG SETcalls the command configSetCommandfunction service terminated upon receipt of commands and command parameters into the Redis Server argcandargv

argc: 4
argv: 0       1    2          3
      config  set  maxmemory  10mb
复制代码

Dynamic configuration maxmemorywill not trigger when: (maxmemory_policy for example) immediately after attempts to trigger garbage collection, and modify other memory-related configuration

if (0) {
    // ...
} config_set_memory_field("maxmemory",server.maxmemory) {
    // 配置不为 0,表示之前限制过内存
    if (server.maxmemory) {
        if (server.maxmemory < zmalloc_used_memory()) {
            serverLog(LL_WARNING,"WARNING: the new maxmemory value set via CONFIG SET is smaller than the current memory usage. This will result in keys eviction and/or inability to accept new write commands depending on the maxmemory-policy.");
        }
        freeMemoryIfNeeded();
    }
    // ...
}

复制代码

Memory limitations of 32-bit machines

For 64-bit machine, it will be maxmemoryset to 0 is not limited memory, but because only a maximum of 32-bit address space of 4 GB, so the default memory limit is set to 3 GB, cache policy set outnoeviction

// src/server.c
// ...
if (server.arch_bits == 32 && server.maxmemory == 0) {
    serverLog(LL_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now.");
    server.maxmemory = 3072LL*(1024*1024); /* 3 GB */
    server.maxmemory_policy = MAXMEMORY_NO_EVICTION;
 }
复制代码

Elimination strategy

Elimination strategies using the CONFIG SET maxmemory-policy configuration defaults:

  • **noeviction: **After the memory is full, it will set return an error directly for the waiting command

For all keys: 

  • allkeys-lru: Use the LRU algorithm to perform deletion within the range of all keys. If the memory is still insufficient, an error will be reported
  • **allkeys-lfu: **Use the LRU algorithm to delete within the range of all keys. If the memory is still insufficient, an error will be reported
  • **allkeys-random: **Randomly delete within the range of all keys. If the memory is still not enough, an error will be reported

For the key with expiration time set: 

  • **volatile-lru: **Use the LRU algorithm to delete the key with the expiration time set. If the memory is still insufficient, an error will be reported
  • **volatile-lfu: **Use the LRU algorithm to delete the key with the expiration time set. If the memory is still insufficient, an error will be reported
  • **volatile-random: **Randomly delete the key with the expiration time set. If the memory is still not enough, an error will be reported
  • **volatile-ttl: ** Delete the key that is about to expire, if the memory is still not enough, an error will be reported

Redis calculates the idle value of some objects before performing elimination idle . The method of calculating the value is different when using different elimination strategies. The idle larger the value, the more priority the value needs to be deleted. The following mainly introduces idle the calculation method of the median value of LRU and LFU

LRU elimination strategy

Sampling deletion, the number of samples is CONFIG SET maxmemory-samples 100  controlled, corresponding to the maxmemory_samples parameters in RedisObject, the larger the number of samples, the closer to the traditional LRU algorithm

Optimization Strategy

In order to avoid the overhead that traditional LRU algorithms usually use hashmap + linked list implementation, Redis has made the following optimizations:

  1. A lru field is set in the RedisObject structure to record the access timestamp of the data, instead of adjusting the position of the object in the linked list each time
typedef struct redisObject {
    // 对象类型
    unsigned type:4;
    // 对象编码
    unsigned encoding:4;
    // LRU 算法和 LFU 算法公用 lru 这个字段
    // 
    // LRU_BITS 默认为 24,因此最大只能存储 194 天的时间戳,
    // 创建对象时会写入这个字段,访问对象时会更新这个字段,
    // 超过之后再从 0 开始计算
    unsigned lru:LRU_BITS;
    int refcount;
    void *ptr;
} robj;

复制代码
  1. The sampling array is used instead of the linked list, and the candidate set is subsequently filtered according to the size of the lru field value to avoid the overhead caused by the linked list. The objects in the candidate set are evictionPoolEntry represented by
struct evictionPoolEntry {
    unsigned long long idle; // 用于淘汰排序,在不同算法中意义不同
    sds key;  // 键的名字
    // ...
};
复制代码

Calculation method

Global object lru_clock record of the current unix timestamp by serverCron calling updateCachedTime the default is updated every 100 ms. The update frequency is related to the hz parameter, which is 1s/hz the update interval time.

LRU_CLOCK_RESOLUTION The value of is 1000, so when using the LRU_CLOCK function to get lru_clock , if the update frequency is more than 1 time per second, the cached in the global variable will be usedlrulcock

unsigned int LRU_CLOCK(void) {
    unsigned int lruclock;
    if (1000/server.hz <= LRU_CLOCK_RESOLUTION) {
        atomicGet(server.lruclock,lruclock);
    } else {
        lruclock = getLRUClock();
    }
    return lruclock;
}
复制代码

If the update frequency is less than 1 time per second, the function will be used to getLRUClock calculate in real timelruclock 

unsigned int getLRUClock(void) {
    // mstime() 获取 unix 时间戳,单位时毫秒
    // 除以 LRU_CLOCK_RESOLUTION(值为 1000),将时间戳转化为秒
    return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
}
复制代码

Which LRU_CLOCK_MAX represents the lru_clock  largest possible value, this value lru is the same as the largest possible value in redisObject, and is defined as follows:

#define LRU_CLOCK_MAX ((1<<LRU_BITS)-1)
复制代码

So in the final comparison, the lru_clock sum robj.lru value is in the range of [0, LRU_CLOCK_MAX]. Logically speaking, the current timestamp should always be greater than the timestamp of the last visit, so the normal calculation rule should be lru_clock-robj.lru . However, since the lru_clock sum robj.lru is the modulo value of the current timestamp, it may be lru_clock less than robj.lru . Therefore, the calculation rule should be changed in this case. lru_clock+194天-robj.lru  However  , it is still impossible to judge when the lru_clock sum robj.lruis more than 194 days apart, so there is more inaccurate deletion. Case.

Combining the above logic is idle the function of obtaining the value under the LRU algorithm :

// src/evict.c

// 以秒为精度计算对象距离上一次访问的间隔时间,然后转化成毫秒返回
unsigned long long estimateObjectIdleTime(robj *o) {
    unsigned long long lruclock = LRU_CLOCK();
    if (lruclock >= o->lru) {
        return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
    } else {
        return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
                    LRU_CLOCK_RESOLUTION;
    }
}
复制代码

In Redis 3.0, when the number of samples is set to 10, the effect is very close to the traditional LRU algorithm Screen Shot 2021-03-04 at 4.05.09 PM.png

LFU elimination strategy

The LFU algorithm multiplexes robj.lru fields and splits this 24-bit field into two parts:

  • ldt (last decrement time, unit: minute): The first 16 bits of the lru field, which represents the access timestamp of the data, and can only be stored for up to 45 days.
  • Counter value: the last 8 bits of the lru field, indicating the frequency of data access

Incremental strategy

The maximum value that counter can represent is 255, so the relationship between counter and the number of visits cannot be linear. The calculation steps used here are as follows:

  1. Randomly take a random number r between 0 and 1
  2. Compare r with the size of 1/((counter-LFU_INIT_VAL)*lfu_log_factor+1), where LFU_INIT_VAL is a constant and the default is 5, and lfu_log_factor is a configurable parameter, and the default is 10
  3. If r is small, the counter is increased by 1, otherwise the counter remains unchanged

The implementation code is as follows:

uint8_t LFULogIncr(uint8_t counter) {
    // counter 值已经到达了 255,不能再增加,直接返回
    if (counter == 255) return 255;
    double r = (double)rand()/RAND_MAX;
    double baseval = counter - LFU_INIT_VAL; // LFU_INIT_VAL 值为 5
    if (baseval < 0) baseval = 0;
    double p = 1.0/(baseval*server.lfu_log_factor+1);
    if (r < p) counter++;
    return counter;
}
复制代码

The relationship between the number of visits and the counter value is approximately logarithmic. The larger the counter value, the lower the growth rate

// https://redis.io/topics/lru-cache

+--------+------------+------------+------------+------------+------------+
| factor | 100 hits   | 1000 hits  | 100K hits  | 1M hits    | 10M hits   |
+--------+------------+------------+------------+------------+------------+
| 0      | 104        | 255        | 255        | 255        | 255        |
+--------+------------+------------+------------+------------+------------+
| 1      | 18         | 49         | 255        | 255        | 255        |
+--------+------------+------------+------------+------------+------------+
| 10     | 10         | 18         | 142        | 255        | 255        |
+--------+------------+------------+------------+------------+------------+
| 100    | 8          | 11         | 49         | 143        | 255        |
+--------+------------+------------+------------+------------+------------+
复制代码

Attenuation strategy

In addition to the need to increase the counter when accessing the object, the value of the counter should be decreased correspondingly for objects that have not been accessed for a period of time. The rate of decrease is controlled by the lfu-decay-time parameter. The counter attenuation steps are as follows:

  1. Taking the current timestamp: low (in minutes) is referred to as 16-bit now , and calculate ldt  the difference. There  may be the same problem when calculating the lru_clock sum robj.lruin the LRU algorithm . Since ldt can only represent 45 days at most, if the object is visited more than 45 days ago, the time interval of the visit cannot be accurately calculated.
unsigned long LFUDecrAndReturn(robj *o) {
    // 取高 16 位
    unsigned long ldt = o->lru >> 8;
    // 取低 8 位
    unsigned long counter = o->lru & 255;
    // 如果 lfu_decay_time 为 0,则步修改 counter,否则将 counter 减少 LFUTimeElapsed(ldt)/lfu_decay_time
    unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0;
    if (num_periods)
        // 保证 counter 的最小值位 0
        counter = (num_periods > counter) ? 0 : counter - num_periods;
    return counter;
}

// 计算距离上次访问的间隔时间
unsigned long LFUTimeElapsed(unsigned long ldt) {
    // 取当前时间戳(单位:分钟)
    unsigned long now = LFUGetTimeInMinutes();
    // 计算时间差
    if (now >= ldt) return now-ldt;
    return 65535-ldt+now;
}

// 获取当前时间戳,以分钟为单位,取低 8 位
unsigned long LFUGetTimeInMinutes(void) {
    return (server.unixtime/60) & 65535;
}
复制代码
  1. If lfu_decay_time is 0, modify the counter step by step, otherwise reduce the counter by LFUTimeElapsed(ldt)/lfu_decay_time

For example, when lfu_decay_time is 1, if the object is not accessed for N minutes, then the counter value is reduced by N

Every time I visit will be called a target updateLFUvalue of the update counter:

void updateLFU(robj *val) {
    unsigned long counter = LFUDecrAndReturn(val);
    counter = LFULogIncr(counter);
    val->lru = (LFUGetTimeInMinutes()<<8) | counter;
}
复制代码

Implement elimination

When Redis need to eliminate a batch of data will be called evictionPoolPopulate to obtain a number of objects to be deleted, depending on the phase-out range of settings, will be passed to determine evictionPoolPopulate the sampledict parameters of all the data is there db->dict or only set the expiration time of the datadb->expires 

void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
    int j, k, count;
    dictEntry *samples[server.maxmemory_samples];

    // 随机获取 server.maxmemory_samples 个对象,写入 samples 中
    count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
    // 遍历每个对象
    for (j = 0; j < count; j++) {
        // ...
        // 初始化
        // ...

        de = samples[j];
        key = dictGetKey(de);

        // 如果获取样本的字典不是 db->dict(还可能是 db->expires),并且不是按 volatile-ttl 淘汰
        // 那么还要将对象转化成数据字典中对应的对象,然后取其值
        if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
            if (sampledict != keydict) de = dictFind(keydict, key);
         	
            // #define dictGetVal(he) ((he)->v.val)
            // 这里还是利用 union 的特性,如果是 db->dict 中的元素,返回的是键的值
            // 如果是 db->expires 中的元素,返回的是过期时间
           	o = dictGetVal(de);
        }

        // 按各算法计算 idle 分值,idle 越大的越应该被先淘汰
        //
        // 如果使用 LRU 淘汰算法,则计算对象的空闲时间
        if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
            idle = estimateObjectIdleTime(o);
        // 使用 LFU 淘汰算法,
        } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
            idle = 255-LFUDecrAndReturn(o);
        // 使用 volatile-ttl 算法,用 ULLONG_MAX 减去过期时间作为分值
        } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
            idle = ULLONG_MAX - (long)dictGetVal(de);
        } else {
            serverPanic("Unknown eviction policy in evictionPoolPopulate()");
        }

        k = 0;
        // 与原 pool 中的 idle 值进行比较,找出应该比当前对象先淘汰出去的对象
        while (k < EVPOOL_SIZE &&
               pool[k].key &&
               pool[k].idle < idle) k++;
        if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
            // 没有发现更需要被淘汰的对象,并且 pool 中也没有多余的位置
            // 那么当前对象仍然留在 samples 中
            continue;
        } else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
            // 没有发现更需要被淘汰的对象,但 pool 中有多余的位置
            // 于是将这个对象插入 pool 中
        } else {
            //                    当前对象
            //                       |
            //                       V
            // Pool: [ 0 1 2 3 ...k-1 k ... EVPOOL_SIZE-1]
            // 为了保证 pool 中的数据按 idle 从小到大排列,这里将当前对象插入第 k 个对象后面的位置
            if (pool[EVPOOL_SIZE-1].key == NULL) {
                // pool 的右边还有空余的位置,因此将从第 k 个开始后面的元素整体后移
                memmove(pool+k+1,pool+k,
                    sizeof(pool[0])*(EVPOOL_SIZE-k-1));
            } else {
                // pool 的右边没有空余的位置了,那么将 pool 中前 k 个元素整体左移
                sds cached = pool[0].cached;
                memmove(pool,pool+1,sizeof(pool[0])*k);
            }
        }
        // ...
        // 将当前对象的属性赋值到下标为 k 的元素
        // ...
    }
}
复制代码

After completing the above operations, what remains in the pool is the newly filtered objects that need to be eliminated. It freeMemoryIfNeeded will be called in evictionPoolPopulate to filter the objects that need to be eliminated, and delete one at a time until enough memory is released. If the memory requirement cannot be met, an error will be reported.


Author: klew
link: https: //juejin.cn/post/6935834729295904775

Guess you like

Origin blog.csdn.net/qq_46388795/article/details/114393791