聊聊缓存替换

换出策略

前几天看到别人整理的关于缓存替换的讨论，觉得很是不错。大致有这么几种思路：

随机换出
FIFO（First In First Out)
LRU (Latest Recently Used) 最近最少使用的被换出
LFU (Latest Frequency Used) 最不频繁使用的被换出

最后两个稍微有点绕，看似是俩不一样的思路，但是仔细想想，一个是基于时间线，另一个基于使用频次。都有一个共同的特性，抽象出来，这俩就是计算规则上的不同罢了，实际上大体也还属于同一种模式。

下面简单用代码走一遍，加深下印象。

随机换出

#coding: utf8
__author__ = "郭 璞"
__email__ = "[email protected]"
# random cache algorithm

import random

class Cacher(object):

    def __init__(self, size):
        self.size = size
        self.container = [None for item in range(size)]
        # 统计
        self.hitcnt = 0
        self.misscnt = 0

    def _getrandkey(self, key):
        """通过随机算法找到对应的index"""
        return sum([ord(item) for item in str(key)]) % self.size

    def cache(self, key, value):
        self.container[self._getrandkey(key)] = value

    def get(self, key):
        ret = self.container[self._getrandkey(key)]
        print("method `get` ret: ", ret)
        if ret is None:
            self.misscnt += 1
        else:
            self.hitcnt += 1
        return ret

    def printself(self):
        print(self.container, "misscnt:", self.misscnt, ", hitcnt: ", self.hitcnt)

if __name__ == "__main__":
    cacher = Cacher(10)
    cacher.cache("hello", "hellovalue")
    cacher.printself()
    cacher.cache("mother", "mothervalue")
    cacher.printself()

    print(cacher.get("hello"))
    cacher.printself()
    print(cacher.get("mother"))
    cacher.printself()
    print(cacher.get("oadsado"))
    cacher.printself()
    print(cacher.get("ndsdi"))
    cacher.printself()
    print(cacher.get("madsdsm"))
    cacher.printself()

运行结果：

[None, None, 'hellovalue', None, None, None, None, None, None, None] misscnt: 0 , hitcnt:  0
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 0 , hitcnt:  0
method `get` ret:  hellovalue
hellovalue
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 0 , hitcnt:  1
method `get` ret:  mothervalue
mothervalue
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 0 , hitcnt:  2
method `get` ret:  None
None
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 1 , hitcnt:  2
method `get` ret:  None
None
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 2 , hitcnt:  2
method `get` ret:  mothervalue
mothervalue
[None, None, 'hellovalue', None, None, 'mothervalue', None, None, None, None] misscnt: 2 , hitcnt:  3

FIFO

#coding: utf8
__author__ = "郭 璞"
__email__ = "[email protected]"
# FIFO cacher

class Cacher(object):

    def __init__(self, capacity):
        self.capacity = capacity
        self.container = [None for i in range(capacity)]

    def cache(self, value):
        if len(self.container) >= self.capacity:
            self.container.pop(0)
        self.container.append(value)

    def get(self):
        return self.container.pop(0)

    def printself(self):
        print(self.container)


if __name__ == "__main__":
    cacher = Cacher(3)
    cacher.cache(1)
    cacher.cache(2)
    cacher.cache(3)
    cacher.cache(4)
    cacher.printself()
    cacher.cache(5)
    cacher.printself()

运行结果：

[2, 3, 4]
[3, 4, 5]

LRU

#coding: utf8
__author__ = "郭 璞"
__email__ = "[email protected]"
# LRU Cacher
# 最新使用到的放到最前面
class Node(object):
    def __init__(self, data, next):
        self.data = data
        self.next = next

class Chain(object):
    """
    LRU 算法基础
    """
    def __init__(self, size):
        self.root = None
        self.size = 0
        self.MAX_SIZE = size

    def printself(self):
        data = []
        if self.root == None:
            data = []
        else:
            cursor = self.root
            while cursor != None:
                data.append(cursor.data)
                cursor = cursor.next
            print(data)

    def prepend(self, data):
        if self.size >= self.MAX_SIZE:
            print("数量已达上限, 所以会删去尾部的数据")
            self.removetail()
        if self.root == None:
            self.root = Node(data=data, next=None)
        else:
            tmp = Node(data=data, next=self.root)
            self.root = tmp
        self.size += 1
        return self

    def removetail(self):
        if self.root == None:
            return self
        else:
            pre = self.root
            cursor = self.root.next
            while cursor.next != None:
                pre = cursor
                cursor = cursor.next
            pre.next = None
        return self

    def remove(self, index):
        if self.root == None:
            return self
        if index < 0 or index > self.size:
            return self
        elif index == 0 and self.size > 1:
            self.root = self.root.next
            return self
        elif index > 0 and self.size > 1:
            pre = self.root
            cursor = self.root.next
            counter = 1
            while cursor.next != None:
                if counter == index:
                    pre.next = cursor.next
                    break
                else:
                    counter += 1
                    pre = cursor
                    cursor = cursor.next
            return self


    def apppend(self, data):
        if self.size >= self.MAX_SIZE:
            print("数量已达上限")
            return self
        if self.root == None:
            self.root = Node(data=data, next=None)
        else:
            cursor = self.root
            while cursor.next != None:
                cursor = cursor.next
            tmp = Node(data=data, next=None)
            cursor.next = tmp
        self.size += 1
        return self

    def index(self, data):
        counter = 0
        hit = 0
        if self.root != None:
            counter = 0
            hit = 0
            cursor = self.root
            while cursor != None:
                if cursor.data == data:
                    hit = 1
                    break
                else:
                    cursor = cursor.next
                    counter += 1
        return (counter, hit)

    def cache(self, data):
        if self.root == None :
            return self.prepend(data)
        else:
            index, hit = self.index(data)
            if hit == 0:
                return self.prepend(data)
            else:
                return self.remove(index).prepend(data)





if __name__ == "__main__":
    chain = Chain(5)
    # chain.prepend(1).prepend(2).prepend(3).prepend(4).prepend(5).prepend(6).prepend(7).printself()
    # print(chain.index(3))
    chain.cache(1).cache(2).cache(3).cache(4).cache(5).cache(6).printself()
    chain.cache(3).cache(2).printself()

运行结果：

数量已达上限, 所以会删去尾部的数据
[6, 5, 4, 3, 2]
数量已达上限, 所以会删去尾部的数据
数量已达上限, 所以会删去尾部的数据
[2, 3, 6, 5]

整理

经过上面几个小例子，不难看出有如下几个特点：

随机算法有一定的误伤，编码简单，小容量下效率还行。
FIFO误伤率低，但是不符合**“程序访问的局部性原理”**，热点数据不一定能留的下来。
LRU 相对而言还算靠谱，也符合缓存的生存周期。

换出策略

随机换出

FIFO

LRU

整理

猜你喜欢