RocksDB clock cache

通过clock算法实现，每个shard维护一个循环链表，clock handle循环遍历这个循环链表，寻找没有pinned的entry，如果这个entry上次scan之后访问过，则再给一次机会，清除标记移向下一位。若第二圈依旧一个可踢出的都没找到，就返回false
ClockCache implements the CLOCK algorithm. Each shard of clock cache maintains a circular list of cache entries. A clock handle runs over the circular list looking for unpinned entries to evict, but also giving each entry a second chance to stay in cache if it has been used since last scan. A tbb::concurrent_hash_map is used for lookup.

源码：【cache/clock_cache.cc】（头部注释大量相关介绍）

  // Flags and counters associated with the cache handle:
  //   lowest bit: n-cache bit  (in cache?)
  //   second lowest bit: usage bit
  //   the rest bits: reference count
  // The handle is unused when flags equals to 0. The thread decreases the count
  // to 0 is responsible to put the handle back to recycle_ and cleanup memory.

std::atomic<uint32_t> flags;//Clock cache标志位

static const uint32_t kInCacheBit = 1; static const uint32_t kUsageBit = 2; static const uint32_t kRefsOffset = 2; static const uint32_t kOneRef = 1 << kRefsOffset;

每次访问某一key：

Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
  HashTable::const_accessor accessor;
  if (!table_.find(accessor, CacheKey(key, hash))) {
    return nullptr;
  }
  CacheHandle* handle = accessor->second;
  accessor.release();
  // Ref() could fail if another thread sneak in and evict/erase the cache
  // entry before we are able to hold reference.
  if (!Ref(reinterpret_cast<Cache::Handle*>(handle))) {
    return nullptr;
  }
  // Double check the key since the handle may now representing another key
  // if other threads sneak in, evict/erase the entry and re-used the handle
  // for another cache entry.
  if (hash != handle->hash || key != handle->key) {
    CleanupContext context;
    Unref(handle, false, &context);
    // It is possible Unref() delete the entry, so we need to cleanup.
    Cleanup(context);
    return nullptr;
  }
  return reinterpret_cast<Cache::Handle*>(handle);
}

bool ClockCacheShard::Ref(Cache::Handle* h) {
  auto handle = reinterpret_cast<CacheHandle*>(h);
  // CAS loop to increase reference count.
  uint32_t flags = handle->flags.load(std::memory_order_relaxed);//获取这个handle的标志位
  while (InCache(flags)) {// 如果inCache位为真 return flags & kInCacheBit; 
    // Use acquire semantics on success, as further operations on the cache
    // entry has to be order after reference count is increased.
    if (handle->flags.compare_exchange_weak(flags, flags + kOneRef,//若和flags相等，则将值替换为后者，ref加一（ref不同于usage位）
                                            std::memory_order_acquire,
                                            std::memory_order_relaxed)) {
      if (CountRefs(flags) == 0) {
        // No reference count before the operation.
        pinned_usage_.fetch_add(handle->charge, std::memory_order_relaxed);
      }
      return true;
    }
  }
  return false;
}

bool ClockCacheShard::Unref(CacheHandle* handle, bool set_usage,
                            CleanupContext* context) {
  if (set_usage) {//用完Unref时如果要设置usage位，则设置  相当于LRU中Release时放入LRU头部
    handle->flags.fetch_or(kUsageBit, std::memory_order_relaxed);
  }
  // Use acquire-release semantics as previous operations on the cache entry
  // has to be order before reference count is decreased, and potential cleanup
  // of the entry has to be order after.
  uint32_t flags = handle->flags.fetch_sub(kOneRef, std::memory_order_acq_rel);//ref值减一
  assert(CountRefs(flags) > 0);
  if (CountRefs(flags) == 1) {
    // this is the last reference.
    pinned_usage_.fetch_sub(handle->charge, std::memory_order_relaxed);
    // Cleanup if it is the last reference.
    if (!InCache(flags)) {
      MutexLock l(&mutex_);
      RecycleHandle(handle, context);
    }
  }
  return context->to_delete_value.size();
}

插入时：

先执行EvictFromCache函数，剔除直到有足够的空间Scan through the circular list, evict entries until we get enough capacity for new cache entry of specific size. Return true if success, false otherwise. Has to hold mutex_ before being called.

bool ClockCacheShard::EvictFromCache(size_t charge, CleanupContext* context) {
  size_t usage = usage_.load(std::memory_order_relaxed);
  size_t capacity = capacity_.load(std::memory_order_relaxed);
  if (usage == 0) {
    return charge <= capacity;
  }
  size_t new_head = head_;// head_: Pointer to the next handle in the circular list to be examine for eviction.
  bool second_iteration = false;
  while (usage + charge > capacity) {
    assert(new_head < list_.size());
    if (TryEvict(&list_[new_head], context)) {//尝试剔除指向的位置
      usage = usage_.load(std::memory_order_relaxed);
    }
    new_head = (new_head + 1 >= list_.size()) ? 0 : new_head + 1;//移向下一个或者第一个
    if (new_head == head_) {
      if (second_iteration) {//已经时第二圈走完，仍未跳出while循环（释放足够的空间），则返回false
        return false;
      } else {
        second_iteration = true;//走完一圈，开始第二圈
      }
    }
  }
  head_ = new_head;//空间足够了，更新下一次剔除的开始位置
  return true;
}

bool TryEvict(CacheHandle* value, CleanupContext* context);

检测这个是否可以被剔除（在cache中，且usage bit为0，ref为0）Examine the handle for eviction. If the handle is in cache, usage bit is not set, and referece count is 0, evict it from cache. 否则重置usage bit为0 Otherwise unset the usage bit. Has to hold mutex_ before being called.

bool ClockCacheShard::TryEvict(CacheHandle* handle, CleanupContext* context) {
  mutex_.AssertHeld();
  uint32_t flags = kInCacheBit;//在cache中，且usage ref位均为0
  if (handle->flags.compare_exchange_strong(flags, 0, std::memory_order_acquire, //标志位是否等于flags，是则剔除
                                            std::memory_order_relaxed)) {
    bool erased __attribute__((__unused__)) =
        table_.erase(CacheKey(handle->key, handle->hash));
    assert(erased);
    RecycleHandle(handle, context);
    return true;
  }
  handle->flags.fetch_and(~kUsageBit, std::memory_order_relaxed);//flags与 ~kUsageBit按位and（~y取反）即为将其他位不变，usage位置为0

  return false;
}

猜你喜欢