Deleted range
- Expired key
- When the memory is full, if you continue to execute the
set
command and all the keys have not expired, then the key selected according to the cache elimination strategy will be used
Expired delete
The key with expiration time set in redis will be stored separately
typedef struct redisDb {
dict *dict; // 所有的键值对
dict *expires; //设置了过期时间的键值对
// ...
} redisDb;
复制代码
Set validity period
There are 4 commands in Redis to set the expiration time for the key, namely expire
pexpire
expireat
pexpireat
Set relative time
expire <key> <ttl>
: Thekey
expiration time value is setttl
in seconds .
// src/expire.c
/* EXPIRE key seconds */
void expireCommand(client *c) {
expireGenericCommand(c,mstime(),UNIT_SECONDS);
}
复制代码
pexpire <key> <ttl>
: Thekey
expiration time value is setttl
in milliseconds .
// src/expire.c
/* PEXPIRE key milliseconds */
void pexpireCommand(client *c) {
expireGenericCommand(c,mstime(),UNIT_MILLISECONDS);
}
复制代码
Set absolute time
expireat <key> <timestamp>
: Thekey
expiration time value to the specifiedtimestamp
number of seconds .
// src/expire.c
/* EXPIREAT key time */
void expireatCommand(client *c) {
expireGenericCommand(c,0,UNIT_SECONDS);
}
复制代码
pexpireat <key> <timestamp>
: Thekey
expiration time value to the specifiedtimestamp
number of milliseconds .
// src/expire.c
/* PEXPIREAT key ms_time */
void pexpireatCommand(client *c) {
expireGenericCommand(c,0,UNIT_MILLISECONDS);
}
复制代码
The above 4 methods will eventually call the following general functions expireGenericCommand
:
// src/expire.c
void expireGenericCommand(client *c, long long basetime, int unit) {
robj *key = c->argv[1], *param = c->argv[2];
// 获取数据对象
long long when;
if (getLongLongFromObjectOrReply(c, param, &when, NULL) != C_OK)
return;
// 将时间转化成以 ms 为单位
if (unit == UNIT_SECONDS) when *= 1000;
when += basetime;
// 在 master 节点上,如果设置的过期时间小于当前时间,那么将命令转化成 DEL 指令
if (when <= mstime() && !server.loading && !server.masterhost) {
robj *aux;
int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db,key) :
dbSyncDelete(c->db,key);
// ...
// 将删除命令同步给 slave 和 AOF
// ...
} else {
// 设置过期时间
setExpire(c,c->db,key,when);
// ...
// 构造返回值和发布对象更新消息
// ...
return;
}
}
复制代码
Set the expiration time of the operation by the setExpire
execution, he will be dictEntry
in union v
the s64
set time expires
// src/db.c
void setExpire(client *c, redisDb *db, robj *key, long long when) {
dictEntry *kde, *de;
// 找出 db->dict 中对应的存储对象,这里的查询和用 get 查询数据是逻辑一样,通过 hashFunc(key) & sizemask
// 找到 bucket 后在链表中遍历
kde = dictFind(db->dict,key->ptr);
// 找出 db->expires 中对应的存储对象,如果没有则新建一个
de = dictAddOrFind(db->expires,dictGetKey(kde));
//
dictSetSignedIntegerVal(de,when);
// ...
}
#define dictSetSignedIntegerVal(entry, _val_) \
do { (entry)->v.s64 = _val_; } while(0)
复制代码
db->expires
What is stored in dictEntry
is the expiration key and expiration time, and the expiration time is stored v
in one union
. It can be seen that different usage scenarios or different codes in redis have different v
meanings
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next;
} dictEntry;
复制代码
Query expiration time
ttl key
Returns thekey
remaining number of seconds expired.
// src/expire.c
/* TTL key */
void ttlCommand(client *c) {
ttlGenericCommand(c, 0);
}
复制代码
pttl key
Returns thekey
number of milliseconds remaining expired.
// src/expire.c
/* PTTL key */
void pttlCommand(client *c) {
ttlGenericCommand(c, 1);
}
复制代码
The above two viewing methods will eventually call the following general functions ttlGenericCommand
:
// src/expire.c
/* Implements TTL and PTTL */
void ttlGenericCommand(client *c, int output_ms) {
// ...
// key 不存在时报错
// ...
// 获取过期时间,如果没有过期时间则
expire = getExpire(c->db,c->argv[1]);
if (expire != -1) {
ttl = expire-mstime();
if (ttl < 0) ttl = 0;
}
if (ttl == -1) {
addReplyLongLong(c,-1);
} else {
// 根据指定的单位返回结果,以秒为单位时向上取整
addReplyLongLong(c,output_ms ? ttl : ((ttl+500)/1000));
}
}
复制代码
The operation of obtaining the expiration time is getExpire
executed, and the member is db->expires
obtained after the object is queried union v
ins64
// src/expire.c
// 返回过期时间的绝对时间
long long getExpire(redisDb *db, robj *key) {
dictEntry *de;
// 查询对象
if (dictSize(db->expires) == 0 ||
// 如果返回为 NULL 表示没有设置过期时间,向上返回 -1
(de = dictFind(db->expires,key->ptr)) == NULL) return -1;
// 获取 v.s64
return dictGetSignedIntegerVal(de);
}
#define dictGetSignedIntegerVal(he) ((he)->v.s64)
复制代码
Expiration strategy
Redis integrated use of inert delete and regularly scan realization
Lazy deletion
It will be called every time you visit to expireIfNeeded
determine whether the key has expired. If it expires, the key will be deleted, otherwise the value corresponding to the key will be returned. Using this strategy alone may waste a lot of memory.
// src/db.c
int expireIfNeeded(redisDb *db, robj *key) {
mstime_t when = getExpire(db,key);
mstime_t now;
// 没有设置过期时间,直接返回
if (when < 0) return 0;
// 从硬盘中加载数据时不执行过期操作
if (server.loading) return 0;
// 参考 GitHub Issue #1525
// 对于 master,在执行 Lua Script 的过程中,可能会用某个 key 是否存在当作判断条件
// 为了避免一个脚本中前后条件不一致,将当前时间强制设为脚本开始时间
now = server.lua_caller ? server.lua_time_start : mstime();
// 对于 slave,返回此时 key 是否已过期,但不执行后续删除操作
if (server.masterhost != NULL) return now > when;
// key 未过期
if (now <= when) return 0;
// 统计过期 key 的个数
server.stat_expiredkeys++;
// 向所有的 slave 和 AOF 文件写入一条 DEL 指令
propagateExpire(db,key,server.lazyfree_lazy_expire);
// 向 keyspace channel 中发布一条 key 过期的消息
notifyKeyspaceEvent(NOTIFY_EXPIRED,
"expired",key,db->id);
// 根据配置决定是同步删除还是异步删除(仅删除引用,由后台线程执行物理删除)
return server.lazyfree_lazy_expire ? dbAsyncDelete(db,key) :
dbSyncDelete(db,key);
}
复制代码
Special treatment
- When the Lua script is executed on the master node
Refer to GitHub Issue #1525. For the master, in the process of executing Lua Script, the existence of a key may be used as a judgment condition. In order to avoid inconsistent conditions in a script, the current time is forced to be the script start time. For example /tmp/myscript.lua
, the results of multiple executions of the following Lua script may be inconsistent
-- /tmp/myscript.lua
if redis.call("exists",KEYS[1]) == 1
then
redis.call("incr","mycounter")
end
if redis.call("exists",KEYS[1]) == 1
then
return redis.call("incr","mycounter")
end
复制代码
The specific reproduction operation can refer to the following bash
script:
while [ 1 ]
do
redis-cli set x foo px 100 > /dev/null
sleep 0.092
redis-cli --eval /tmp/myscript.lua x > /dev/null
sleep 0.1
redis-cli get mycounter
redis-cli -p 6380 get mycounter
done
复制代码
- For slave node
On the slave node, the delete operation of the key is DEL
executed by the master , so here only the expired result is returned to the client, and the delete operation is not executed
- Skip this step when reading data from RDB and AOF
Scan regularly
The system scans regularly every once in a while, and deletes expired keys. Using this strategy alone may cause the key to expire but not delete
By default, Redis executes an hz
expired scan every 100ms (through parameter configuration, the execution cycle is 1s/hz). Since the keys with an expiration time set in redisDb will be stored separately, there will be no case of scanning all keys. The specific steps are activeExpireCycle
executed by the function.
Background operations such as activeExpireCycle and incrementallyRehash are all triggered by databasesCron
void activeExpireCycle(int type) {
// ...
// 依次遍历各个 db
for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
int expired;
redisDb *db = server.db+(current_db % server.dbnum);
// 记录下一个执行的 db,这样如果因为超时意外退出,下次可以继续从这个 db 开始,
// 从而在所有 db 上均匀执行清除操作
current_db++;
do {
// ...
// 跳过没有设置过期时间的 key 等不需要执行的情况
// ...
// 抽样个数,默认为 20
if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP)
num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP;
// 从设置了过期时间的 key 中随机抽取 20 个
while (num--) {
dictEntry *de;
long long ttl;
// 随机挑选 dict 中的一个 key
if ((de = dictGetRandomKey(db->expires)) == NULL) break;
ttl = dictGetSignedIntegerVal(de)-now;
// 执行删除,具体删除操作和惰性删除中类似
if (activeExpireCycleTryExpire(db,de,now)) expired++;
// ...
}
// ...
// 更新统计数据等操作
// ...
// 如果每次删除的 key 超过了样本数的 25%,说明过期键占的比例较高,需要再重复执行依次
} while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4);
}
// ...
}
复制代码
Random sampling is dictGetRandomKey
performed by
// src/dict.c
/* Return a random entry from the hash table. Useful to
* implement randomized algorithms */
dictEntry *dictGetRandomKey(dict *d)
{
dictEntry *he, *orighe;
unsigned long h;
int listlen, listele;
// 没有数据,返回为 NULL,外层函数接收到 NULL 后会中断过期操作的执行
if (dictSize(d) == 0) return NULL;
// 根据 rehashidx 参数判断是否正在执行 rehash,如果正在执行,
// 则先执行 rehash 中的一个步骤
if (dictIsRehashing(d)) _dictRehashStep(d);
if (dictIsRehashing(d)) {
do {
// 正在执行 rehash,所以两个 ht 中的对象都要考虑
//
// 由于正在执行 rehash,所以可以肯定 ht[0] 中下标小于等于 rehashidx 的 bucket
// 肯定没有数据,所以只从 ht[0] 中大于 rehashidx 的 bucket 和 ht[1] 中抽取
h = d->rehashidx + (random() % (d->ht[0].size +
d->ht[1].size -
d->rehashidx));
he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
d->ht[0].table[h];
// 取到空 bucket 时重试
} while(he == NULL);
} else {
do {
// 参考写入 ht 时计算下标的规则 hashFunc(key) & sizemake
// 这里 random() & sizemask 是随机取一个下标
h = random() & d->ht[0].sizemask;
he = d->ht[0].table[h];
// 取到空 bucket 时重试
} while(he == NULL);
}
// 到这一步 he 是 ht[n] 中某个 bucket 中完整的链表
// 所以还要从这个链表中随机取一个对象
// 遍历计算整个链表的长度
listlen = 0;
orighe = he;
while(he) {
he = he->next;
listlen++;
}
// 随机取链表中某个对象的下标
listele = random() % listlen;
he = orighe;
// 重新遍历链表获取指定下标的对象
while(listele--) he = he->next;
return he;
}
复制代码
Cache elimination
Configure the maximum memory limit
Configure in redis.conf
The configuration file and command line parameters are loaded when the redis server starts, and maxmemory
stored in the maxmemory field of the Server object
main
Perform initialization and other operations when the redis server starts, and the loadServerConfig
function to load the configuration file will be executed
// src/server.c
int main(int argc, char **argv) {
// ..
// 加载配置
loadServerConfig(configfile,options);
// ..
// 警告过小的配置
if (server.maxmemory > 0 && server.maxmemory < 1024*1024) {
serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
}
}
复制代码
loadServerConfig
Load the configuration file, stdin, and command line parameters into the config string, and then call loadServerConfigFromString
// src/config.c
void loadServerConfig(char *filename, char *options) {
sds config = sdsempty();
char buf[CONFIG_MAX_LINE+1];
// 加载配置文件
if (filename) {
FILE *fp;
// 启动命令为 ./redis-server - 则从 stdin 中读取,需要用 <C-D> 触发 EOF
if (filename[0] == '-' && filename[1] == '\0') {
fp = stdin;
} else {
// 第一个参数不是 -,则尝试打开这个参数指定的文件
if ((fp = fopen(filename,"r")) == NULL) {
serverLog(LL_WARNING,
"Fatal error, can't open config file '%s'", filename);
exit(1);
}
}
// 将配置文件中的每一行追加到 config 中
while(fgets(buf,CONFIG_MAX_LINE+1,fp) != NULL)
config = sdscat(config,buf);
if (fp != stdin) fclose(fp);
}
// 添加其他选项,例如 ./redis-server --port 8080 后面的参数,直接加到 config 中
if (options) {
config = sdscat(config,"\n");
config = sdscat(config,options);
}
loadServerConfigFromString(config);
sdsfree(config);
}
复制代码
loadServerConfigFromString
config
Read the configuration line by line from the string in the previous step and write to the server
object
// src/config.c
void loadServerConfigFromString(char *config) {
// ...
// 按行读取配置文件
lines = sdssplitlen(config,strlen(config),"\n",1,&totlines);
for (i = 0; i < totlines; i++) {
// 跳过无效的配置和注释
// ...
argv = sdssplitargs(lines[i],&argc);
// 将配置命令转化成小写
sdstolower(argv[0]);
// 根据配置命令初始化配置,strcasecmp 比较
if (!strcasecmp(argv[0],"timeout") && argc == 2) {
server.maxidletime = atoi(argv[1]);
if (server.maxidletime < 0) {
err = "Invalid timeout value"; goto loaderr;
}
// ...
} else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
// memtoll 将字符串形式的配置转化成对应的 long long 值
// 例如 1kb -> 1024
server.maxmemory = memtoll(argv[1],NULL);
}
}
}
复制代码
Use CONFIG SET
the command to configure
Redis Server receives the client CONFIG SET
calls the command configSetCommand
function service terminated upon receipt of commands and command parameters into the Redis Server argc
andargv
argc: 4
argv: 0 1 2 3
config set maxmemory 10mb
复制代码
Dynamic configuration maxmemory
will not trigger when: (maxmemory_policy for example) immediately after attempts to trigger garbage collection, and modify other memory-related configuration
if (0) {
// ...
} config_set_memory_field("maxmemory",server.maxmemory) {
// 配置不为 0,表示之前限制过内存
if (server.maxmemory) {
if (server.maxmemory < zmalloc_used_memory()) {
serverLog(LL_WARNING,"WARNING: the new maxmemory value set via CONFIG SET is smaller than the current memory usage. This will result in keys eviction and/or inability to accept new write commands depending on the maxmemory-policy.");
}
freeMemoryIfNeeded();
}
// ...
}
复制代码
Memory limitations of 32-bit machines
For 64-bit machine, it will be maxmemory
set to 0 is not limited memory, but because only a maximum of 32-bit address space of 4 GB, so the default memory limit is set to 3 GB, cache policy set outnoeviction
// src/server.c
// ...
if (server.arch_bits == 32 && server.maxmemory == 0) {
serverLog(LL_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now.");
server.maxmemory = 3072LL*(1024*1024); /* 3 GB */
server.maxmemory_policy = MAXMEMORY_NO_EVICTION;
}
复制代码
Elimination strategy
Elimination strategies using the CONFIG SET maxmemory-policy
configuration defaults:
- **noeviction: **After the memory is full, it will
set
return an error directly for the waiting command
For all keys:
- allkeys-lru: Use the LRU algorithm to perform deletion within the range of all keys. If the memory is still insufficient, an error will be reported
- **allkeys-lfu: **Use the LRU algorithm to delete within the range of all keys. If the memory is still insufficient, an error will be reported
- **allkeys-random: **Randomly delete within the range of all keys. If the memory is still not enough, an error will be reported
For the key with expiration time set:
- **volatile-lru: **Use the LRU algorithm to delete the key with the expiration time set. If the memory is still insufficient, an error will be reported
- **volatile-lfu: **Use the LRU algorithm to delete the key with the expiration time set. If the memory is still insufficient, an error will be reported
- **volatile-random: **Randomly delete the key with the expiration time set. If the memory is still not enough, an error will be reported
- **volatile-ttl: ** Delete the key that is about to expire, if the memory is still not enough, an error will be reported
Redis calculates the idle
value of some objects before performing elimination idle
. The method of calculating the value is different when using different elimination strategies. The idle
larger the value, the more priority the value needs to be deleted. The following mainly introduces idle
the calculation method of the median value of LRU and LFU
LRU elimination strategy
Sampling deletion, the number of samples is CONFIG SET maxmemory-samples 100
controlled, corresponding to the maxmemory_samples
parameters in RedisObject, the larger the number of samples, the closer to the traditional LRU algorithm
Optimization Strategy
In order to avoid the overhead that traditional LRU algorithms usually use hashmap + linked list implementation, Redis has made the following optimizations:
- A lru field is set in the RedisObject structure to record the access timestamp of the data, instead of adjusting the position of the object in the linked list each time
typedef struct redisObject {
// 对象类型
unsigned type:4;
// 对象编码
unsigned encoding:4;
// LRU 算法和 LFU 算法公用 lru 这个字段
//
// LRU_BITS 默认为 24,因此最大只能存储 194 天的时间戳,
// 创建对象时会写入这个字段,访问对象时会更新这个字段,
// 超过之后再从 0 开始计算
unsigned lru:LRU_BITS;
int refcount;
void *ptr;
} robj;
复制代码
- The sampling array is used instead of the linked list, and the candidate set is subsequently filtered according to the size of the lru field value to avoid the overhead caused by the linked list. The objects in the candidate set are
evictionPoolEntry
represented by
struct evictionPoolEntry {
unsigned long long idle; // 用于淘汰排序,在不同算法中意义不同
sds key; // 键的名字
// ...
};
复制代码
Calculation method
Global object lru_clock
record of the current unix timestamp by serverCron
calling updateCachedTime
the default is updated every 100 ms. The update frequency is related to the hz
parameter, which is 1s/hz
the update interval time.
LRU_CLOCK_RESOLUTION
The value of is 1000, so when using the LRU_CLOCK
function to get lru_clock
, if the update frequency is more than 1 time per second, the cached in the global variable will be usedlrulcock
unsigned int LRU_CLOCK(void) {
unsigned int lruclock;
if (1000/server.hz <= LRU_CLOCK_RESOLUTION) {
atomicGet(server.lruclock,lruclock);
} else {
lruclock = getLRUClock();
}
return lruclock;
}
复制代码
If the update frequency is less than 1 time per second, the function will be used to getLRUClock
calculate in real timelruclock
unsigned int getLRUClock(void) {
// mstime() 获取 unix 时间戳,单位时毫秒
// 除以 LRU_CLOCK_RESOLUTION(值为 1000),将时间戳转化为秒
return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
}
复制代码
Which LRU_CLOCK_MAX
represents the lru_clock
largest possible value, this value lru
is the same as the largest possible value in redisObject, and is defined as follows:
#define LRU_CLOCK_MAX ((1<<LRU_BITS)-1)
复制代码
So in the final comparison, the lru_clock
sum robj.lru
value is in the range of [0, LRU_CLOCK_MAX]. Logically speaking, the current timestamp should always be greater than the timestamp of the last visit, so the normal calculation rule should be lru_clock-robj.lru
. However, since the lru_clock
sum robj.lru
is the modulo value of the current timestamp, it may be lru_clock
less than robj.lru
. Therefore, the calculation rule should be changed in this case. lru_clock+194天-robj.lru
However , it is still impossible to judge when the lru_clock
sum robj.lru
is more than 194 days apart, so there is more inaccurate deletion. Case.
Combining the above logic is idle
the function of obtaining the value under the LRU algorithm :
// src/evict.c
// 以秒为精度计算对象距离上一次访问的间隔时间,然后转化成毫秒返回
unsigned long long estimateObjectIdleTime(robj *o) {
unsigned long long lruclock = LRU_CLOCK();
if (lruclock >= o->lru) {
return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
} else {
return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
LRU_CLOCK_RESOLUTION;
}
}
复制代码
In Redis 3.0, when the number of samples is set to 10, the effect is very close to the traditional LRU algorithm
LFU elimination strategy
The LFU algorithm multiplexes robj.lru
fields and splits this 24-bit field into two parts:
- ldt (last decrement time, unit: minute): The first 16 bits of the lru field, which represents the access timestamp of the data, and can only be stored for up to 45 days.
- Counter value: the last 8 bits of the lru field, indicating the frequency of data access
Incremental strategy
The maximum value that counter can represent is 255, so the relationship between counter and the number of visits cannot be linear. The calculation steps used here are as follows:
- Randomly take a random number r between 0 and 1
- Compare r with the size of 1/((counter-LFU_INIT_VAL)*lfu_log_factor+1), where LFU_INIT_VAL is a constant and the default is 5, and lfu_log_factor is a configurable parameter, and the default is 10
- If r is small, the counter is increased by 1, otherwise the counter remains unchanged
The implementation code is as follows:
uint8_t LFULogIncr(uint8_t counter) {
// counter 值已经到达了 255,不能再增加,直接返回
if (counter == 255) return 255;
double r = (double)rand()/RAND_MAX;
double baseval = counter - LFU_INIT_VAL; // LFU_INIT_VAL 值为 5
if (baseval < 0) baseval = 0;
double p = 1.0/(baseval*server.lfu_log_factor+1);
if (r < p) counter++;
return counter;
}
复制代码
The relationship between the number of visits and the counter value is approximately logarithmic. The larger the counter value, the lower the growth rate
// https://redis.io/topics/lru-cache
+--------+------------+------------+------------+------------+------------+
| factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits |
+--------+------------+------------+------------+------------+------------+
| 0 | 104 | 255 | 255 | 255 | 255 |
+--------+------------+------------+------------+------------+------------+
| 1 | 18 | 49 | 255 | 255 | 255 |
+--------+------------+------------+------------+------------+------------+
| 10 | 10 | 18 | 142 | 255 | 255 |
+--------+------------+------------+------------+------------+------------+
| 100 | 8 | 11 | 49 | 143 | 255 |
+--------+------------+------------+------------+------------+------------+
复制代码
Attenuation strategy
In addition to the need to increase the counter when accessing the object, the value of the counter should be decreased correspondingly for objects that have not been accessed for a period of time. The rate of decrease is controlled by the lfu-decay-time
parameter. The counter attenuation steps are as follows:
- Taking the current timestamp: low (in minutes) is referred to as 16-bit
now
, and calculateldt
the difference. There may be the same problem when calculating thelru_clock
sumrobj.lru
in the LRU algorithm . Since ldt can only represent 45 days at most, if the object is visited more than 45 days ago, the time interval of the visit cannot be accurately calculated.
unsigned long LFUDecrAndReturn(robj *o) {
// 取高 16 位
unsigned long ldt = o->lru >> 8;
// 取低 8 位
unsigned long counter = o->lru & 255;
// 如果 lfu_decay_time 为 0,则步修改 counter,否则将 counter 减少 LFUTimeElapsed(ldt)/lfu_decay_time
unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0;
if (num_periods)
// 保证 counter 的最小值位 0
counter = (num_periods > counter) ? 0 : counter - num_periods;
return counter;
}
// 计算距离上次访问的间隔时间
unsigned long LFUTimeElapsed(unsigned long ldt) {
// 取当前时间戳(单位:分钟)
unsigned long now = LFUGetTimeInMinutes();
// 计算时间差
if (now >= ldt) return now-ldt;
return 65535-ldt+now;
}
// 获取当前时间戳,以分钟为单位,取低 8 位
unsigned long LFUGetTimeInMinutes(void) {
return (server.unixtime/60) & 65535;
}
复制代码
- If lfu_decay_time is 0, modify the counter step by step, otherwise reduce the counter by LFUTimeElapsed(ldt)/lfu_decay_time
For example, when lfu_decay_time is 1, if the object is not accessed for N minutes, then the counter value is reduced by N
Every time I visit will be called a target updateLFU
value of the update counter:
void updateLFU(robj *val) {
unsigned long counter = LFUDecrAndReturn(val);
counter = LFULogIncr(counter);
val->lru = (LFUGetTimeInMinutes()<<8) | counter;
}
复制代码
Implement elimination
When Redis need to eliminate a batch of data will be called evictionPoolPopulate
to obtain a number of objects to be deleted, depending on the phase-out range of settings, will be passed to determine evictionPoolPopulate
the sampledict
parameters of all the data is there db->dict
or only set the expiration time of the datadb->expires
void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
int j, k, count;
dictEntry *samples[server.maxmemory_samples];
// 随机获取 server.maxmemory_samples 个对象,写入 samples 中
count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
// 遍历每个对象
for (j = 0; j < count; j++) {
// ...
// 初始化
// ...
de = samples[j];
key = dictGetKey(de);
// 如果获取样本的字典不是 db->dict(还可能是 db->expires),并且不是按 volatile-ttl 淘汰
// 那么还要将对象转化成数据字典中对应的对象,然后取其值
if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
if (sampledict != keydict) de = dictFind(keydict, key);
// #define dictGetVal(he) ((he)->v.val)
// 这里还是利用 union 的特性,如果是 db->dict 中的元素,返回的是键的值
// 如果是 db->expires 中的元素,返回的是过期时间
o = dictGetVal(de);
}
// 按各算法计算 idle 分值,idle 越大的越应该被先淘汰
//
// 如果使用 LRU 淘汰算法,则计算对象的空闲时间
if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
idle = estimateObjectIdleTime(o);
// 使用 LFU 淘汰算法,
} else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
idle = 255-LFUDecrAndReturn(o);
// 使用 volatile-ttl 算法,用 ULLONG_MAX 减去过期时间作为分值
} else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
idle = ULLONG_MAX - (long)dictGetVal(de);
} else {
serverPanic("Unknown eviction policy in evictionPoolPopulate()");
}
k = 0;
// 与原 pool 中的 idle 值进行比较,找出应该比当前对象先淘汰出去的对象
while (k < EVPOOL_SIZE &&
pool[k].key &&
pool[k].idle < idle) k++;
if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
// 没有发现更需要被淘汰的对象,并且 pool 中也没有多余的位置
// 那么当前对象仍然留在 samples 中
continue;
} else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
// 没有发现更需要被淘汰的对象,但 pool 中有多余的位置
// 于是将这个对象插入 pool 中
} else {
// 当前对象
// |
// V
// Pool: [ 0 1 2 3 ...k-1 k ... EVPOOL_SIZE-1]
// 为了保证 pool 中的数据按 idle 从小到大排列,这里将当前对象插入第 k 个对象后面的位置
if (pool[EVPOOL_SIZE-1].key == NULL) {
// pool 的右边还有空余的位置,因此将从第 k 个开始后面的元素整体后移
memmove(pool+k+1,pool+k,
sizeof(pool[0])*(EVPOOL_SIZE-k-1));
} else {
// pool 的右边没有空余的位置了,那么将 pool 中前 k 个元素整体左移
sds cached = pool[0].cached;
memmove(pool,pool+1,sizeof(pool[0])*k);
}
}
// ...
// 将当前对象的属性赋值到下标为 k 的元素
// ...
}
}
复制代码
After completing the above operations, what remains in the pool is the newly filtered objects that need to be eliminated. It freeMemoryIfNeeded
will be called in evictionPoolPopulate
to filter the objects that need to be eliminated, and delete one at a time until enough memory is released. If the memory requirement cannot be met, an error will be reported.
Author: klew
link: https: //juejin.cn/post/6935834729295904775