HBase LruBlockCache源码分析

本章对LruBlockCache源码进行简单部分和讲解，其中包括如下部分：

构造函数
getBlock
cacheBlock
evict

构造函数说明

LruBlockCache 构造函数

LruBlockCache 构造函数

从构造函数可以看出，整体有一下几个部分组成

LruBlockCache 内存比例

LruBlockCache 内存比例

统计参数

this.stats = new CacheStats(this.getClass().getSimpleName());

// 一下配合stats进行必要统计
this.count = new AtomicLong(0);
this.elements = new AtomicLong(0);
this.dataBlockElements = new AtomicLong(0);
this.dataBlockSize = new AtomicLong(0);
this.size = new AtomicLong(this.overhead);

存储容器

new ConcurrentHashMap<BlockCacheKey, LruCachedBlock>(mapInitialSize, 
		mapLoadFactor, mapConcurrencyLevel);

LruBlockCache EvictionThread （clean/10s | clean/notify）

if(evictionThread) {
	this.evictionThread = new EvictionThread(this);
	this.evictionThread.start();
} else {
	this.evictionThread = null;
}

@Override
public void run() {
	enteringRun = true;
	while (this.go) {
	synchronized(this) {
		try {
			this.wait(1000 * 10/*Don't wait for ever*/);
		} catch(InterruptedException e) {
			LOG.warn("Interrupted eviction thread ", e);
			Thread.currentThread().interrupt();
		}
	}
	LruBlockCache cache = this.cache.get();
	if (cache == null) break;
		cache.evict();
	}
}

定时日志 – print log/5m (default)

this.scheduleThreadPool.scheduleAtFixedRate(new StatisticsThread(this),
        statThreadPeriod, statThreadPeriod, TimeUnit.SECONDS);

public void logStats() {
    // Log size
    long totalSize = heapSize();
    long freeSize = maxSize - totalSize;
    LruBlockCache.LOG.info("totalSize=" + StringUtils.byteDesc(totalSize) + ", " +
        "freeSize=" + StringUtils.byteDesc(freeSize) + ", " +
        "max=" + StringUtils.byteDesc(this.maxSize) + ", " +
        "blockCount=" + getBlockCount() + ", " +
        "accesses=" + stats.getRequestCount() + ", " +
        "hits=" + stats.getHitCount() + ", " +
        "hitRatio=" + (stats.getHitCount() == 0 ?
          "0" : (StringUtils.formatPercent(stats.getHitRatio(), 2)+ ", ")) + ", " +
        "cachingAccesses=" + stats.getRequestCachingCount() + ", " +
        "cachingHits=" + stats.getHitCachingCount() + ", " +
        "cachingHitsRatio=" + (stats.getHitCachingCount() == 0 ?
          "0,": (StringUtils.formatPercent(stats.getHitCachingRatio(), 2) + ", ")) +
        "evictions=" + stats.getEvictionCount() + ", " +
        "evicted=" + stats.getEvictedCount() + ", " +
        "evictedPerRun=" + stats.evictedPerEviction());
  }

getBlock分析

public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat,
      boolean updateCacheMetrics) {
	LruCachedBlock cb = map.get(cacheKey);
	if (cb == null) {
		if (!repeat && updateCacheMetrics) {
			// update CacheStats的统计信息
			stats.miss(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
		}

		// victimHandler -- 表示【无辜者】，即哪些不应该被清理，但是被清理掉的Entry 将会转移到victim中
		if (victimHandler != null && !repeat) {
			// 尝试从victimHandler中获取Entry
			Cacheable result = victimHandler.getBlock(cacheKey, caching, repeat, updateCacheMetrics);

			// 重新将victim中的Entry缓存到当前cache中
			if (result != null && caching) {
				cacheBlock(cacheKey, result, false, true);
			}
			return result;
		}
		return null;
	}

	// 更新各统计指标
	if (updateCacheMetrics) stats.hit(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
	cb.access(count.incrementAndGet());
	
	return cb.getBuffer();
}

cacheBlock分析

public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory,
      final boolean cacheDataInL1) {

	if (buf.heapSize() > maxBlockSize) {
	    // 更新stats，并输出warn日志
	    return;
	}

	LruCachedBlock cb = map.get(cacheKey);
	if (cb != null) { // 检测该key是否已经存在
		// 检测key对应value 与当前需要缓存的buf 是否相等
	    if (BlockCacheUtil.compareCacheBlock(buf, cb.getBuffer()) != 0) {
	    	// 不相等，抛出异常
	        throw new RuntimeException("Cached block contents differ, which should not have happened."
				  + "cacheKey:" + cacheKey);
	    }
	    // 输出warn日志
	    return;
	}

	long currentSize = size.get();
	long currentAcceptableSize = acceptableSize();
	long hardLimitSize = (long) (hardCapacityLimitFactor * currentAcceptableSize);
	if (currentSize >= hardLimitSize) {
		stats.failInsert();

		// print trace log
		if (!evictionInProgress) {
			// 空间使用过多，进行必要清理
			runEviction();
		}
		return;
	}

	cb = new LruCachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
	map.put(cacheKey, cb);

	// 更新各统计指标
	long newSize = updateSizeMetrics(cb, false);
	long val = elements.incrementAndGet();
	if (buf.getBlockType().isData()) {
		dataBlockElements.incrementAndGet();
	}
	
	if (newSize > currentAcceptableSize && !evictionInProgress) {
		runEviction(); // 清理空间
	}
}

evict分析

该逻辑较为复杂，换种方式来说明一下

阈值: 当bytesToFree = currentSize - minSize(); bytesToFree > 0;
过程（正常情况下，不正常情况不以说明）（伪代码）:
1. [single, multi, memory].map(new BlockBucket(_))
2. cache.entrys.foreach(entry => findBucket(entry.blockType).add(entry))
3. queue = new PriorityQueue; queue.add(buckets)
4. queue.foreach(_.free)
5. print log

注意点说明

PriorityQueue: 可以翻译成【优先队列】，即添加至该队列中的对象将会被排序

public int compareTo(BlockBucket that) {
	return Long.compare(this.overflow(), that.overflow()); // BlockBucket的排序逻辑
}

BlockBucket构造函数

public BlockBucket(String name, long bytesToFree, long blockSize, long bucketSize) {
	this.name = name; // single | multi | memory
	this.bucketSize = bucketSize; // (long)Math.floor(this.maxSize * this.[single|multi|memory]Factor * this.minFactor)
	queue = new LruCachedBlockQueue(bytesToFree, blockSize); // 被选择出来 要被清理掉的对象 存储在这里
	totalSize = 0; // 实际上最终将表示为: 当前cache中 该类型（name）缓存的整体大小
}

public void add(LruCachedBlock block) {
	// 添加对象时，更新totalSize大小（只增不减，即使对象不存在与queue中【这句话 后面解释】）
	totalSize += block.heapSize(); 
	queue.add(block); // 将对象添加至queue中，会进行必要的【选择】
}

public long overflow() {
	// 当前缓存中该类型（name）缓存所占空间 与 该类型（name）缓存应占空间的差值，即溢出大小
	return totalSize - bucketSize; 
}

LruCachedBlockQueue

/** LruCachedBlockQueue */
/** MinMaxPriorityQueue<LruCachedBlock> queue */
// LruCachedBlockQueue的add方法
public void add(LruCachedBlock cb) {
	if(heapSize < maxSize) {
		// 当 当前存储的大小（heapSize） 小于 需要存储的大小（maxSize，其实是bytesToFree，即需要回收的空间大小）
		queue.add(cb);
		heapSize += cb.heapSize();
		
	} else {
		// 当前存储的大小（heapSize） 已经到达 需要存储的大小（maxSize）
		LruCachedBlock head = queue.peek(); // 取出queue中 accessTime最大的
		if(cb.compareTo(head) > 0) { 
			// cb.accessTime < head.accessTime，将cb放入，head换出
			heapSize += cb.heapSize();
			heapSize -= head.heapSize();
			if(heapSize > maxSize) {
				queue.poll();
			} else {
				heapSize += head.heapSize();
			}
			queue.add(cb);
		}
	}
	// 如此一来，queue中只会存储
	// 1. accessTime最小的那些对象
	// 2. 存储的对象大小总和 将正好大于等于 需要清理的总和
}

/** LruCachedBlock */
@Override
public int compareTo(LruCachedBlock that) {
	if (this.accessTime == that.accessTime) return 0;
		return this.accessTime < that.accessTime ? 1 : -1; // 比较两者的【被访问次数】
}

注意点总结

循环将cache.entrys添加到BlockBucket中，最后BlockBucket中只会留下【被选择清理的对象】
添加到PriorityQueue中的BlockBucket，会按照overflow的大小（实际存放量与 min存放量之间的差值）进行排序，即将超出最多的bucket优先进行清理。同时会尽可能的保留更多的block：

	long bucketBytesToFree = Math.min(overflow,
		(bytesToFree - bytesFreed) / remainingBuckets);
	bytesFreed += bucket.free(bucketBytesToFree);

LruBlockCache简易导图

LruBlockCache整理

HBase LruBlockCache源码分析

HBase LruBlockCache源码分析

构造函数说明

getBlock分析

cacheBlock分析

evict分析

注意点说明

注意点总结

LruBlockCache简易导图

猜你喜欢