BlockManagerMasterActor
/**
* 负责维护各个executor的blockManager的元数据:
* blockManagerInfo,BlockStatus
*/
private[spark] class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus: LiveListenerBus)
extends Actor with ActorLogReceive with Logging
...
// blockManagerMaster要负责维护每个blockManager的blockManagerInfo
private val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]
// 每个executor到blockManagerId的映射关系
private val blockManagerIdByExecutor = new mutable.HashMap[String, BlockManagerId]
/**
* 注册blockManager
*/
private def register(id: BlockManagerId, maxMemSize: Long, slaveActor: ActorRef) {
val time = System.currentTimeMillis()
// 如果本地hashMap中没有指定的blockManagerId,说明从来没有注册过
// 才去注册这个blockManager
if (!blockManagerInfo.contains(id)) {
blockManagerIdByExecutor.get(id.executorId) match {
case Some(oldId) =>
// A block manager of the same executor already exists, so remove it (assumed dead)
logError("Got two different block manager registrations on same executor - "
+ s" will replace old one $oldId with new one $id")
// 从内存中移除掉executorId相关的blockmanagerInfo
removeExecutor(id.executorId)
case None =>
}
logInfo("Registering block manager %s with %s RAM, %s".format(
id.hostPort, Utils.bytesToString(maxMemSize), id))
blockManagerIdByExecutor(id.executorId) = id
blockManagerInfo(id) = new BlockManagerInfo(
id, System.currentTimeMillis(), maxMemSize, slaveActor)
}
listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxMemSize))
}
/**
* 更新blockInfo,每个blockManager上,如果block发生了变化,那么要发送updateBlockInfo请求
* 来blockManagerMaster,这里,进行blockinfo的更新
*/
private def updateBlockInfo(
blockManagerId: BlockManagerId,
blockId: BlockId,
storageLevel: StorageLevel,
memSize: Long,
diskSize: Long,
tachyonSize: Long): Boolean = {
if (!blockManagerInfo.contains(blockManagerId)) {
if (blockManagerId.isDriver && !isLocal) {
// We intentionally do not register the master (except in local mode),
// so we should not indicate failure.
return true
} else {
return false
}
}
if (blockId == null) {
blockManagerInfo(blockManagerId).updateLastSeenMs()
return true
}
// TODO BlockManagerInfo.updateBlockInfo 更新block信息
blockManagerInfo(blockManagerId).updateBlockInfo(
blockId, storageLevel, memSize, diskSize, tachyonSize)
/**
* 每个block可能会在多个blockManager上面
* 如果将storagelevel设置成带有_2的这种,那么就需要将block replicate一份放到其他blockmanager上
* blockLocation map其实保存了每个blockId对应的blockManagerId的set集合
*/
var locations: mutable.HashSet[BlockManagerId] = null
if (blockLocations.containsKey(blockId)) {
locations = blockLocations.get(blockId)
} else {
locations = new mutable.HashSet[BlockManagerId]
blockLocations.put(blockId, locations)
}
if (storageLevel.isValid) {
locations.add(blockManagerId)
} else {
locations.remove(blockManagerId)
}
// Remove the block from master tracking if it has been removed on all slaves.
if (locations.size == 0) {
blockLocations.remove(blockId)
}
true
}
BlockManager
/**
* blockManager运行在每个节点上,包括driver和executor,都有一份,主要提供了在本地或者远程存储数据的功能
* 支持内存,磁盘、堆外存储
*/
private[spark] class BlockManager(
...
// 初始化
def initialize(appId: String): Unit = {
// 初始化,用于进行进程block数据传输的blockTransferService
blockTransferService.init(this)
shuffleClient.init(appId)
// 为当前这个blockmanager创建一个唯一的blockManagerId
// 一个blockManager是通过一个节点上的executor来唯一标识的
blockManagerId = BlockManagerId(
executorId, blockTransferService.hostName, blockTransferService.port)
shuffleServerId = if (externalShuffleServiceEnabled) {
BlockManagerId(executorId, blockTransferService.hostName, externalShuffleServicePort)
} else {
blockManagerId
}
// 使用blockManagerMasterActor的引用,进行blockManager的注册
// 发送消息到blockManagerMasterActor
master.registerBlockManager(blockManagerId, maxMemory, slaveActor)
// Register Executors' configuration with the local shuffle service, if one should exist.
if (externalShuffleServiceEnabled && !blockManagerId.isDriver) {
registerWithExternalShuffleServer()
}
}
=> master.registerBlockManager -> BlockManagerMaster.registerBlockManager
def registerBlockManager(blockManagerId: BlockManagerId, maxMemSize: Long, slaveActor: ActorRef) {
logInfo("Trying to register BlockManager")
tell(RegisterBlockManager(blockManagerId, maxMemSize, slaveActor))
logInfo("Registered BlockManager")
}
==> tell(RegisterBlockManager(blockManagerId, maxMemSize, slaveActor)) -> 发送 RegisterBlockManager 到blockManagerMasterActor
BlockManagerMasterActor.receiveWithLogging.(case RegisterBlockManager) -> blockManagerMasterActor收到消息后进行注册register(blockManagerId, maxMemSize, slaveActor)
====> 下面分析 slaveActor
private val slaveActor = actorSystem.actorOf(
Props(new BlockManagerSlaveActor(this, mapOutputTracker)),
name = "BlockManagerActor" + BlockManager.ID_GENERATOR.next)
说明 BlockManagerSlaveActor 向 blockManagerMasterActor 注册
从本地拉取数据 BlockManager.doGetLocal
/**
* 从本地获取数据
*/
private def doGetLocal(blockId: BlockId, asBlockResult: Boolean): Option[Any] = {
// 首先尝试获取block对应的blockinfo的锁
val info = blockInfo.get(blockId).orNull
if (info != null) {
// 对所有blockInfo都会进行多线程并发访问的同步操作
// 所以blockInfo,相当于是对一个block,用于作为多线程并发访问的同步监视器
info.synchronized {
// Double check to make sure the block is still there. There is a small chance that the
// block has been removed by removeBlock (which also synchronizes on the blockInfo object).
// Note that this only checks metadata tracking. If user intentionally deleted the block
// on disk or from off heap storage without using removeBlock, this conditional check will
// still pass but eventually we will get an exception because we can't find the block.
if (blockInfo.get(blockId).isEmpty) {
logWarning(s"Block $blockId had been removed")
return None
}
// If another thread is writing the block, wait for it to become ready.
if (!info.waitForReady()) {
// If we get here, the block write failed.
logWarning(s"Block $blockId was marked as failure.")
return None
}
val level = info.level
logDebug(s"Level for block $blockId is $level")
// Look for the block in memory
// 如果持久化级别使用了内存(MEMORY_ONLY 等),则尝试从MemoryStore中获取数据
if (level.useMemory) {
logDebug(s"Getting block $blockId from memory")
val result = if (asBlockResult) {
// TODO memoryStore.getValues
memoryStore.getValues(blockId).map(new BlockResult(_, DataReadMethod.Memory, info.size))
} else {
// TODO memoryStore.getBytes
memoryStore.getBytes(blockId)
}
result match {
case Some(values) =>
return result
case None =>
logDebug(s"Block $blockId not found in memory")
}
}
// Look for the block in Tachyon
if (level.useOffHeap) {
logDebug(s"Getting block $blockId from tachyon")
if (tachyonStore.contains(blockId)) {
tachyonStore.getBytes(blockId) match {
case Some(bytes) =>
if (!asBlockResult) {
return Some(bytes)
} else {
return Some(new BlockResult(
dataDeserialize(blockId, bytes), DataReadMethod.Memory, info.size))
}
case None =>
logDebug(s"Block $blockId not found in tachyon")
}
}
}
// Look for block on disk, potentially storing it back in memory if required
if (level.useDisk) {
logDebug(s"Getting block $blockId from disk")
// TODO diskStore.getBytes
val bytes: ByteBuffer = diskStore.getBytes(blockId) match {
case Some(b) => b
case None =>
throw new BlockException(
blockId, s"Block $blockId not found on disk, though it should be")
}
assert(0 == bytes.position())
// 如果使用的disk存储级别,但是没有使用内存级别
if (!level.useMemory) {
// If the block shouldn't be stored in memory, we can just return it
if (asBlockResult) {
return Some(new BlockResult(dataDeserialize(blockId, bytes), DataReadMethod.Disk,
info.size))
} else {
return Some(bytes)
}
// 如果使用的disk存储级别,也使用内存级别
} else {
// Otherwise, we also have to store something in the memory store
if (!level.deserialized || !asBlockResult) {
/* We'll store the bytes in memory if the block's storage level includes
* "memory serialized", or if it should be cached as objects in memory
* but we only requested its serialized bytes. */
val copyForMemory = ByteBuffer.allocate(bytes.limit)
copyForMemory.put(bytes)
// 从disk读取出来之后,其实会尝试将其放入MemoryStore中,也就是缓存到内存中
memoryStore.putBytes(blockId, copyForMemory, level)
bytes.rewind()
}
if (!asBlockResult) {
return Some(bytes)
} else {
val values = dataDeserialize(blockId, bytes)
if (level.deserialized) {
// Cache the values before returning them
val putResult = memoryStore.putIterator(
blockId, values, level, returnValues = true, allowPersistToDisk = false)
// The put may or may not have succeeded, depending on whether there was enough
// space to unroll the block. Either way, the put here should return an iterator.
putResult.data match {
case Left(it) =>
return Some(new BlockResult(it, DataReadMethod.Disk, info.size))
case _ =>
// This only happens if we dropped the values back to disk (which is never)
throw new SparkException("Memory store did not return an iterator!")
}
} else {
return Some(new BlockResult(values, DataReadMethod.Disk, info.size))
}
}
}
}
}
} else {
logDebug(s"Block $blockId not registered locally")
}
None
}
=> 从内存中拉取数据 MemoryStore.getBytes
override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
val entry = entries.synchronized {
entries.get(blockId)
}
if (entry == null) {
None
// 如果获取到了非序列化的数据
} else if (entry.deserialized) {
// 调用blockManager的序列化方法,将数据序列化后返回
Some(blockManager.dataSerialize(blockId, entry.value.asInstanceOf[Array[Any]].iterator))
} else {
// 否则直接返回数据
Some(entry.value.asInstanceOf[ByteBuffer].duplicate()) // Doesn't actually copy the data
}
}
==> MemoryStore.entries
// memorystore 维护的entries map, 存放的是每个block的数据,每个block在内存中的数据用MemoryEntry代表
private val entries = new LinkedHashMap[BlockId, MemoryEntry](32, 0.75f, true)
=>从磁盘中拉取数据 DiskStore.getBytes 底层使用的是java的nio进行文件的读写操作
private def getBytes(file: File, offset: Long, length: Long): Option[ByteBuffer] = {
// DiskStore底层实现的是java的nio进行文件的读写操作
val channel = new RandomAccessFile(file, "r").getChannel
try {
// For small files, directly read rather than memory map
if (length < minMemoryMapBytes) {
val buf = ByteBuffer.allocate(length.toInt)
channel.position(offset)
while (buf.remaining() != 0) {
if (channel.read(buf) == -1) {
throw new IOException("Reached EOF before filling buffer\n" +
s"offset=$offset\nfile=${file.getAbsolutePath}\nbuf.remaining=${buf.remaining}")
}
}
buf.flip()
Some(buf)
} else {
Some(channel.map(MapMode.READ_ONLY, offset, length))
}
} finally {
channel.close()
}
}
从远程拉取数据 BlockManager.doGetRemote
/**
* 从远程获取数据
*/
private def doGetRemote(blockId: BlockId, asBlockResult: Boolean): Option[Any] = {
require(blockId != null, "BlockId is null")
// 首先从BlockManagerMaster上获取每个blockId对应的BlockManager的信息
val locations = Random.shuffle(master.getLocations(blockId))
for (loc <- locations) {
logDebug(s"Getting remote block $blockId from $loc")
/**
* 使用blockTransferService 进行异步的远程网络获取,将block数据传输回来
* 连接的时候,使用的BlockManager的唯一标识(host,port,executorId)
*/
val data = blockTransferService.fetchBlockSync(
loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()
if (data != null) {
if (asBlockResult) {
return Some(new BlockResult(
dataDeserialize(blockId, data),
DataReadMethod.Network,
data.limit()))
} else {
return Some(data)
}
}
logDebug(s"The value of block $blockId is null")
}
logDebug(s"Block $blockId not found")
None
}
写入数据 BlockManager.doPut
/**
* 1. blockStore.putBytes 写入内存或磁盘
* 2. reportBlockStatus 写完以后汇报 BlockManagerMasterActor
* 3. replicate 如果要复用的话,随机挑一个blockmanager 然后blockTransferService将数据异步写入到其他的blockManager上
*/
private def doPut(
blockId: BlockId,
data: BlockValues,
level: StorageLevel,
tellMaster: Boolean = true,
effectiveStorageLevel: Option[StorageLevel] = None)
: Seq[(BlockId, BlockStatus)] = {
require(blockId != null, "BlockId is null")
require(level != null && level.isValid, "StorageLevel is null or invalid")
effectiveStorageLevel.foreach {
level =>
require(level != null && level.isValid, "Effective StorageLevel is null or invalid")
}
// Return value
val updatedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]
/** Remember the block's storage level so that we can correctly drop it to disk if it needs
* to be dropped right after it got put into memory. Note, however, that other threads will
* not be able to get() this block until we call markReady on its BlockInfo.
* 为要写入的block创建了blockInfo,并将其放入blockInfo map中
*/
val putBlockInfo = {
val tinfo = new BlockInfo(level, tellMaster)
// Do atomically !
val oldBlockOpt = blockInfo.putIfAbsent(blockId, tinfo)
if (oldBlockOpt.isDefined) {
if (oldBlockOpt.get.waitForReady()) {
logWarning(s"Block $blockId already exists on this machine; not re-adding it")
return updatedBlocks
}
// TODO: So the block info exists - but previous attempt to load it (?) failed.
// What do we do now ? Retry on it ?
oldBlockOpt.get
} else {
tinfo
}
}
val startTimeMs = System.currentTimeMillis
/* If we're storing values and we need to replicate the data, we'll want access to the values,
* but because our put will read the whole iterator, there will be no values left. For the
* case where the put serializes data, we'll remember the bytes, above; but for the case where
* it doesn't, such as deserialized storage, let's rely on the put returning an Iterator. */
var valuesAfterPut: Iterator[Any] = null
// Ditto for the bytes after the put
var bytesAfterPut: ByteBuffer = null
// Size of the block in bytes
var size = 0L
// The level we actually use to put the block
val putLevel = effectiveStorageLevel.getOrElse(level)
// If we're storing bytes, then initiate the replication before storing them locally.
// This is faster as data is already serialized and ready to send.
val replicationFuture = data match {
case b: ByteBufferValues if putLevel.replication > 1 =>
// Duplicate doesn't copy the bytes, but just creates a wrapper
val bufferView = b.buffer.duplicate()
Future {
replicate(blockId, bufferView, putLevel) }
case _ => null
}
// 尝试对blockinfo加锁,进行多线程并发访问同步
putBlockInfo.synchronized {
logTrace("Put for block %s took %s to get into synchronized block"
.format(blockId, Utils.getUsedTimeMs(startTimeMs)))
var marked = false
try {
// returnValues - Whether to return the values put
// blockStore - The type of storage to put these values into
// 首先根据持久化级别,选择一种BLockStore: MemoryStore,DiskStore等
val (returnValues, blockStore: BlockStore) = {
if (putLevel.useMemory) {
// Put it in memory first, even if it also has useDisk set to true;
// We will drop it to disk later if the memory store can't hold it.
(true, memoryStore)
} else if (putLevel.useOffHeap) {
// Use tachyon for off-heap storage
(false, tachyonStore)
} else if (putLevel.useDisk) {
// Don't get back the bytes from put unless we replicate them
(putLevel.replication > 1, diskStore)
} else {
assert(putLevel == StorageLevel.NONE)
throw new BlockException(
blockId, s"Attempted to put block $blockId without specifying storage level!")
}
}
/**
* Actually put the values
* 根据选择的store,然后根据数据的类型,将数据放入到store中
*/
val result = data match {
case IteratorValues(iterator) =>
blockStore.putIterator(blockId, iterator, putLevel, returnValues)
case ArrayValues(array) =>
blockStore.putArray(blockId, array, putLevel, returnValues)
case ByteBufferValues(bytes) =>
bytes.rewind()
// TODO 1) MemoryStore.putBytes | DiskStore.putBytes
blockStore.putBytes(blockId, bytes, putLevel)
}
size = result.size
result.data match {
case Left (newIterator) if putLevel.useMemory => valuesAfterPut = newIterator
case Right (newBytes) => bytesAfterPut = newBytes
case _ =>
}
// Keep track of which blocks are dropped from memory
if (putLevel.useMemory) {
result.droppedBlocks.foreach {
updatedBlocks += _ }
}
// 获取到一个Block对应点BlockStatus
val putBlockStatus = getCurrentBlockStatus(blockId, putBlockInfo)
if (putBlockStatus.storageLevel != StorageLevel.NONE) {
// Now that the block is in either the memory, tachyon, or disk store,
// let other threads read it, and tell the master about it.
marked = true
putBlockInfo.markReady(size)
if (tellMaster) {
// TODO 2) 将写入的block数据发送到BlockManagerMasterActor,以便进行block元数据的同步和维护
reportBlockStatus(blockId, putBlockInfo, putBlockStatus)
}
updatedBlocks += ((blockId, putBlockStatus))
}
} finally {
// If we failed in putting the block to memory/disk, notify other possible readers
// that it has failed, and then remove it from the block info map.
if (!marked) {
// Note that the remove must happen before markFailure otherwise another thread
// could've inserted a new BlockInfo before we remove it.
blockInfo.remove(blockId)
putBlockInfo.markFailure()
logWarning(s"Putting block $blockId failed")
}
}
}
logDebug("Put block %s locally took %s".format(blockId, Utils.getUsedTimeMs(startTimeMs)))
// Either we're storing bytes and we asynchronously started replication, or we're storing
// values and need to serialize and replicate them now:
// 3) 如果持久化级别定义了_2这种后缀,说明需要对block进行replica,然后传输到其他节点
if (putLevel.replication > 1) {
data match {
case ByteBufferValues(bytes) =>
if (replicationFuture != null) {
Await.ready(replicationFuture, Duration.Inf)
}
case _ =>
val remoteStartTime = System.currentTimeMillis
// Serialize the block if not already done
if (bytesAfterPut == null) {
if (valuesAfterPut == null) {
throw new SparkException(
"Underlying put returned neither an Iterator nor bytes! This shouldn't happen.")
}
bytesAfterPut = dataSerialize(blockId, valuesAfterPut)
}
// TODO 3) 复制操作
replicate(blockId, bytesAfterPut, putLevel)
logDebug("Put block %s remotely took %s"
.format(blockId, Utils.getUsedTimeMs(remoteStartTime)))
}
}
BlockManager.dispose(bytesAfterPut)
if (putLevel.replication > 1) {
logDebug("Putting block %s with replication took %s"
.format(blockId, Utils.getUsedTimeMs(startTimeMs)))
} else {
logDebug("Putting block %s without replication took %s"
.format(blockId, Utils.getUsedTimeMs(startTimeMs)))
}
updatedBlocks
}
=> 写入内存 MemoryStore.putBytes -> putIterator -> putArray -> tryToPut
/**
* 优先放入内存,不行的话尝试移除部分旧数据,再将block存入
*/
private def tryToPut(
blockId: BlockId,
value: Any,
size: Long,
deserialized: Boolean): ResultWithDroppedBlocks = {
/* TODO: Its possible to optimize the locking by locking entries only when selecting blocks
* to be dropped. Once the to-be-dropped blocks have been selected, and lock on entries has
* been released, it must be ensured that those to-be-dropped blocks are not double counted
* for freeing up more space for another block that needs to be put. Only then the actually
* dropping of blocks (and writing to disk if necessary) can proceed in parallel. */
var putSuccess = false
val droppedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]
/**
* 此处要加多线程
* 如刚判定内存足够放,但其他线程放入了数据,此时再往内存放入数据时,直接oom,内存溢出
*/
accountingLock.synchronized {
/**
* TODO ensureFreeSpace
* 判断内存是否够用,如果不够用,此时会将部分数据用用dropFromMemory方法尝试
* 写入磁盘,如果持久化级别不支持磁盘,那么数据丢失
*/
val freeSpaceResult = ensureFreeSpace(blockId, size)
val enoughFreeSpace = freeSpaceResult.success
droppedBlocks ++= freeSpaceResult.droppedBlocks
// 将数据写入内存时 首先调用 enoughFreeSpace 判断内存是否足够放入数据
if (enoughFreeSpace) {
// 给数据创建一份MemoryEntry
val entry = new MemoryEntry(value, size, deserialized)
entries.synchronized {
// 将数据放入内存的entries中
entries.put(blockId, entry)
currentMemory += size
}
val valuesOrBytes = if (deserialized) "values" else "bytes"
logInfo("Block %s stored as %s in memory (estimated size %s, free %s)".format(
blockId, valuesOrBytes, Utils.bytesToString(size), Utils.bytesToString(freeMemory)))
putSuccess = true
} else {
// Tell the block manager that we couldn't put it in memory so that it can drop it to
// disk if the block allows disk storage.
val data = if (deserialized) {
Left(value.asInstanceOf[Array[Any]])
} else {
Right(value.asInstanceOf[ByteBuffer].duplicate())
}
val droppedBlockStatus = blockManager.dropFromMemory(blockId, data)
droppedBlockStatus.foreach {
status => droppedBlocks += ((blockId, status)) }
}
}
ResultWithDroppedBlocks(putSuccess, droppedBlocks)
}
==> ensureFreeSpace
private def ensureFreeSpace(
blockIdToAdd: BlockId,
space: Long): ResultWithDroppedBlocks = {
logInfo(s"ensureFreeSpace($space) called with curMem=$currentMemory, maxMem=$maxMemory")
val droppedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]
if (space > maxMemory) {
logInfo(s"Will not store $blockIdToAdd as it is larger than our memory limit")
return ResultWithDroppedBlocks(success = false, droppedBlocks)
}
// Take into account the amount of memory currently occupied by unrolling blocks
val actualFreeMemory = freeMemory - currentUnrollMemory
// 如果当前内存不足够将这个block放入的话
if (actualFreeMemory < space) {
val rddToAdd = getRddId(blockIdToAdd)
val selectedBlocks = new ArrayBuffer[BlockId]
var selectedMemory = 0L
// This is synchronized to ensure that the set of entries is not changed
// (because of getValue or getBytes) while traversing the iterator, as that
// can lead to exceptions.
entries.synchronized {
val iterator = entries.entrySet().iterator()
// 尝试从entries中移除一部分数据
while (actualFreeMemory + selectedMemory < space && iterator.hasNext) {
val pair = iterator.next()
val blockId = pair.getKey
if (rddToAdd.isEmpty || rddToAdd != getRddId(blockId)) {
selectedBlocks += blockId
selectedMemory += pair.getValue.size
}
}
}
// 判断 如果移除一部分数据之后就可以存放新的block了
if (actualFreeMemory + selectedMemory >= space) {
logInfo(s"${selectedBlocks.size} blocks selected for dropping")
// 将之前选择的要移除的block数据 遍历
for (blockId <- selectedBlocks) {
val entry = entries.synchronized {
entries.get(blockId) }
// This should never be null as only one thread should be dropping
// blocks and removing entries. However the check is still here for
// future safety.
if (entry != null) {
val data = if (entry.deserialized) {
Left(entry.value.asInstanceOf[Array[Any]])
} else {
Right(entry.value.asInstanceOf[ByteBuffer].duplicate())
}
// TODO dropFromMemory 尝试将数据写入磁盘,如果block的持久化级别没有说可以写入磁盘,那么数据就彻底丢了
val droppedBlockStatus = blockManager.dropFromMemory(blockId, data)
droppedBlockStatus.foreach {
status => droppedBlocks += ((blockId, status)) }
}
}
return ResultWithDroppedBlocks(success = true, droppedBlocks)
} else {
logInfo(s"Will not store $blockIdToAdd as it would require dropping another block " +
"from the same RDD")
return ResultWithDroppedBlocks(success = false, droppedBlocks)
}
}
ResultWithDroppedBlocks(success = true, droppedBlocks)
}
=> 写入磁盘 DiskStore.putBytes
// 使用java nio将数据写入磁盘文件
override def putBytes(blockId: BlockId, _bytes: ByteBuffer, level: StorageLevel): PutResult = {
// So that we do not modify the input offsets !
// duplicate does not copy buffer, so inexpensive
val bytes = _bytes.duplicate()
logDebug(s"Attempting to put block $blockId")
val startTime = System.currentTimeMillis
val file = diskManager.getFile(blockId)
val channel = new FileOutputStream(file).getChannel
while (bytes.remaining > 0) {
channel.write(bytes)
}
channel.close()
val finishTime = System.currentTimeMillis
logDebug("Block %s stored as %s file on disk in %d ms".format(
file.getName, Utils.bytesToString(bytes.limit), finishTime - startTime))
PutResult(bytes.limit(), Right(bytes.duplicate()))
}