BlockManager源码分析

原理分析

BlockManagerMasterActor

/**
 * 负责维护各个executor的blockManager的元数据:
 * blockManagerInfo,BlockStatus
 */
private[spark]  class BlockManagerMasterActor(val isLocal: Boolean, conf: SparkConf, listenerBus: LiveListenerBus)
  extends Actor with ActorLogReceive with Logging
...
  // blockManagerMaster要负责维护每个blockManager的blockManagerInfo
  private val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]
  // 每个executor到blockManagerId的映射关系
  private val blockManagerIdByExecutor = new mutable.HashMap[String, BlockManagerId]

  /**
   * 注册blockManager
   */
  private def register(id: BlockManagerId, maxMemSize: Long, slaveActor: ActorRef) {
    
    
    val time = System.currentTimeMillis()
    
    // 如果本地hashMap中没有指定的blockManagerId,说明从来没有注册过
    // 才去注册这个blockManager
    if (!blockManagerInfo.contains(id)) {
    
    
      blockManagerIdByExecutor.get(id.executorId) match {
    
    
        case Some(oldId) =>
          // A block manager of the same executor already exists, so remove it (assumed dead)
          logError("Got two different block manager registrations on same executor - " 
              + s" will replace old one $oldId with new one $id")
          //  从内存中移除掉executorId相关的blockmanagerInfo
          removeExecutor(id.executorId)  
        case None =>
      }
      logInfo("Registering block manager %s with %s RAM, %s".format(
        id.hostPort, Utils.bytesToString(maxMemSize), id))
      
      blockManagerIdByExecutor(id.executorId) = id
      
      blockManagerInfo(id) = new BlockManagerInfo(
        id, System.currentTimeMillis(), maxMemSize, slaveActor)
    }
    listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxMemSize))
  }

  /**
   * 更新blockInfo,每个blockManager上,如果block发生了变化,那么要发送updateBlockInfo请求
   * 来blockManagerMaster,这里,进行blockinfo的更新
   */
  private def updateBlockInfo(
      blockManagerId: BlockManagerId,
      blockId: BlockId,
      storageLevel: StorageLevel,
      memSize: Long,
      diskSize: Long,
      tachyonSize: Long): Boolean = {
    
    

    if (!blockManagerInfo.contains(blockManagerId)) {
    
    
      if (blockManagerId.isDriver && !isLocal) {
    
    
        // We intentionally do not register the master (except in local mode),
        // so we should not indicate failure.
        return true
      } else {
    
    
        return false
      }
    }

    if (blockId == null) {
    
    
      blockManagerInfo(blockManagerId).updateLastSeenMs()
      return true
    }

    // TODO BlockManagerInfo.updateBlockInfo 更新block信息
    blockManagerInfo(blockManagerId).updateBlockInfo(
      blockId, storageLevel, memSize, diskSize, tachyonSize)
    
    /**
     *  每个block可能会在多个blockManager上面
     *  如果将storagelevel设置成带有_2的这种,那么就需要将block replicate一份放到其他blockmanager上
     *  blockLocation map其实保存了每个blockId对应的blockManagerId的set集合
     */
    var locations: mutable.HashSet[BlockManagerId] = null
    if (blockLocations.containsKey(blockId)) {
    
    
      locations = blockLocations.get(blockId)
    } else {
    
    
      locations = new mutable.HashSet[BlockManagerId]
      blockLocations.put(blockId, locations)
    }

    if (storageLevel.isValid) {
    
    
      locations.add(blockManagerId)
    } else {
    
    
      locations.remove(blockManagerId)
    }

    // Remove the block from master tracking if it has been removed on all slaves.
    if (locations.size == 0) {
    
    
      blockLocations.remove(blockId)
    }
    true
  }

BlockManager

/**
 * blockManager运行在每个节点上,包括driver和executor,都有一份,主要提供了在本地或者远程存储数据的功能
 * 支持内存,磁盘、堆外存储
 */
private[spark] class BlockManager(
...
// 初始化
  def initialize(appId: String): Unit = {
    
    
    // 初始化,用于进行进程block数据传输的blockTransferService
    blockTransferService.init(this)
    shuffleClient.init(appId)

    // 为当前这个blockmanager创建一个唯一的blockManagerId
    // 一个blockManager是通过一个节点上的executor来唯一标识的
    blockManagerId = BlockManagerId(
      executorId, blockTransferService.hostName, blockTransferService.port)

    shuffleServerId = if (externalShuffleServiceEnabled) {
    
    
      BlockManagerId(executorId, blockTransferService.hostName, externalShuffleServicePort)
    } else {
    
    
      blockManagerId
    }

    // 使用blockManagerMasterActor的引用,进行blockManager的注册
    // 发送消息到blockManagerMasterActor
    master.registerBlockManager(blockManagerId, maxMemory, slaveActor)

    // Register Executors' configuration with the local shuffle service, if one should exist.
    if (externalShuffleServiceEnabled && !blockManagerId.isDriver) {
    
    
      registerWithExternalShuffleServer()
    }
  }
=> master.registerBlockManager -> BlockManagerMaster.registerBlockManager
  def registerBlockManager(blockManagerId: BlockManagerId, maxMemSize: Long, slaveActor: ActorRef) {
    
    
    logInfo("Trying to register BlockManager")
    tell(RegisterBlockManager(blockManagerId, maxMemSize, slaveActor))
    logInfo("Registered BlockManager")
  }
==>  tell(RegisterBlockManager(blockManagerId, maxMemSize, slaveActor))  ->  发送 RegisterBlockManager 到blockManagerMasterActor
BlockManagerMasterActor.receiveWithLogging.(case RegisterBlockManager) -> blockManagerMasterActor收到消息后进行注册register(blockManagerId, maxMemSize, slaveActor)
====> 下面分析 slaveActor
  private val slaveActor = actorSystem.actorOf(
    Props(new BlockManagerSlaveActor(this, mapOutputTracker)),
    name = "BlockManagerActor" + BlockManager.ID_GENERATOR.next) 
 说明 BlockManagerSlaveActor 向 blockManagerMasterActor 注册

从本地拉取数据 BlockManager.doGetLocal

  /**
   * 从本地获取数据
   */
  private def doGetLocal(blockId: BlockId, asBlockResult: Boolean): Option[Any] = {
    
    
    // 首先尝试获取block对应的blockinfo的锁
    val info = blockInfo.get(blockId).orNull
    if (info != null) {
    
    
      // 对所有blockInfo都会进行多线程并发访问的同步操作
      // 所以blockInfo,相当于是对一个block,用于作为多线程并发访问的同步监视器
      info.synchronized {
    
    
        // Double check to make sure the block is still there. There is a small chance that the
        // block has been removed by removeBlock (which also synchronizes on the blockInfo object).
        // Note that this only checks metadata tracking. If user intentionally deleted the block
        // on disk or from off heap storage without using removeBlock, this conditional check will
        // still pass but eventually we will get an exception because we can't find the block.
        if (blockInfo.get(blockId).isEmpty) {
    
    
          logWarning(s"Block $blockId had been removed")
          return None
        }

        // If another thread is writing the block, wait for it to become ready.
        if (!info.waitForReady()) {
    
    
          // If we get here, the block write failed.
          logWarning(s"Block $blockId was marked as failure.")
          return None
        }

        val level = info.level
        logDebug(s"Level for block $blockId is $level")

        // Look for the block in memory
        // 如果持久化级别使用了内存(MEMORY_ONLY 等),则尝试从MemoryStore中获取数据
        if (level.useMemory) {
    
    
          logDebug(s"Getting block $blockId from memory")
          val result = if (asBlockResult) {
    
    
            // TODO memoryStore.getValues
            memoryStore.getValues(blockId).map(new BlockResult(_, DataReadMethod.Memory, info.size))
          } else {
    
    
            // TODO memoryStore.getBytes
            memoryStore.getBytes(blockId)
          }
          result match {
    
    
            case Some(values) =>
              return result
            case None =>
              logDebug(s"Block $blockId not found in memory")
          }
        }

        // Look for the block in Tachyon
        if (level.useOffHeap) {
    
    
          logDebug(s"Getting block $blockId from tachyon")
          if (tachyonStore.contains(blockId)) {
    
    
            tachyonStore.getBytes(blockId) match {
    
    
              case Some(bytes) =>
                if (!asBlockResult) {
    
    
                  return Some(bytes)
                } else {
    
    
                  return Some(new BlockResult(
                    dataDeserialize(blockId, bytes), DataReadMethod.Memory, info.size))
                }
              case None =>
                logDebug(s"Block $blockId not found in tachyon")
            }
          }
        }

        // Look for block on disk, potentially storing it back in memory if required
        if (level.useDisk) {
    
    
          logDebug(s"Getting block $blockId from disk")
          // TODO diskStore.getBytes
          val bytes: ByteBuffer = diskStore.getBytes(blockId) match {
    
    
            case Some(b) => b
            case None =>
              throw new BlockException(
                blockId, s"Block $blockId not found on disk, though it should be")
          }
          assert(0 == bytes.position())

          // 如果使用的disk存储级别,但是没有使用内存级别
          if (!level.useMemory) {
    
    
            // If the block shouldn't be stored in memory, we can just return it
            if (asBlockResult) {
    
    
              return Some(new BlockResult(dataDeserialize(blockId, bytes), DataReadMethod.Disk,
                info.size))
            } else {
    
    
              return Some(bytes)
            }

          // 如果使用的disk存储级别,也使用内存级别
          } else {
    
    
            // Otherwise, we also have to store something in the memory store
            if (!level.deserialized || !asBlockResult) {
    
    
              /* We'll store the bytes in memory if the block's storage level includes
               * "memory serialized", or if it should be cached as objects in memory
               * but we only requested its serialized bytes. */
              val copyForMemory = ByteBuffer.allocate(bytes.limit)
              copyForMemory.put(bytes)
              // 从disk读取出来之后,其实会尝试将其放入MemoryStore中,也就是缓存到内存中
              memoryStore.putBytes(blockId, copyForMemory, level)
              bytes.rewind()
            }
            if (!asBlockResult) {
    
    
              return Some(bytes)
            } else {
    
    
              val values = dataDeserialize(blockId, bytes)
              if (level.deserialized) {
    
    
                // Cache the values before returning them
                val putResult = memoryStore.putIterator(
                  blockId, values, level, returnValues = true, allowPersistToDisk = false)
                // The put may or may not have succeeded, depending on whether there was enough
                // space to unroll the block. Either way, the put here should return an iterator.
                putResult.data match {
    
    
                  case Left(it) =>
                    return Some(new BlockResult(it, DataReadMethod.Disk, info.size))
                  case _ =>
                    // This only happens if we dropped the values back to disk (which is never)
                    throw new SparkException("Memory store did not return an iterator!")
                }
              } else {
    
    
                return Some(new BlockResult(values, DataReadMethod.Disk, info.size))
              }
            }
          }
        }
      }
    } else {
    
    
      logDebug(s"Block $blockId not registered locally")
    }
    None
  }

=> 从内存中拉取数据 MemoryStore.getBytes
  override def getBytes(blockId: BlockId): Option[ByteBuffer] = {
    
    
    val entry = entries.synchronized {
    
    
      entries.get(blockId)
    }
    if (entry == null) {
    
    
      None

    // 如果获取到了非序列化的数据
    } else if (entry.deserialized) {
    
    
      // 调用blockManager的序列化方法,将数据序列化后返回
      Some(blockManager.dataSerialize(blockId, entry.value.asInstanceOf[Array[Any]].iterator))
    } else {
    
    
      // 否则直接返回数据
      Some(entry.value.asInstanceOf[ByteBuffer].duplicate()) // Doesn't actually copy the data
    }
  }
==>   MemoryStore.entries
// memorystore 维护的entries map, 存放的是每个block的数据,每个block在内存中的数据用MemoryEntry代表
  private val entries = new LinkedHashMap[BlockId, MemoryEntry](32, 0.75f, true)

=>从磁盘中拉取数据 DiskStore.getBytes  底层使用的是java的nio进行文件的读写操作
  private def getBytes(file: File, offset: Long, length: Long): Option[ByteBuffer] = {
    
    
    // DiskStore底层实现的是java的nio进行文件的读写操作
    val channel = new RandomAccessFile(file, "r").getChannel

    try {
    
    
      // For small files, directly read rather than memory map
      if (length < minMemoryMapBytes) {
    
    
        val buf = ByteBuffer.allocate(length.toInt)
        channel.position(offset)
        while (buf.remaining() != 0) {
    
    
          if (channel.read(buf) == -1) {
    
    
            throw new IOException("Reached EOF before filling buffer\n" +
              s"offset=$offset\nfile=${file.getAbsolutePath}\nbuf.remaining=${buf.remaining}")
          }
        }
        buf.flip()
        Some(buf)
      } else {
    
    
        Some(channel.map(MapMode.READ_ONLY, offset, length))
      }
    } finally {
    
    
      channel.close()
    }
  }

从远程拉取数据 BlockManager.doGetRemote

  /**
   * 从远程获取数据
   */
  private def doGetRemote(blockId: BlockId, asBlockResult: Boolean): Option[Any] = {
    
    
    require(blockId != null, "BlockId is null")
    // 首先从BlockManagerMaster上获取每个blockId对应的BlockManager的信息
    val locations = Random.shuffle(master.getLocations(blockId))
    for (loc <- locations) {
    
    
      logDebug(s"Getting remote block $blockId from $loc")

      /**
       * 使用blockTransferService 进行异步的远程网络获取,将block数据传输回来
       * 连接的时候,使用的BlockManager的唯一标识(host,port,executorId)
       */
      val data = blockTransferService.fetchBlockSync(
        loc.host, loc.port, loc.executorId, blockId.toString).nioByteBuffer()

      if (data != null) {
    
    
        if (asBlockResult) {
    
    
          return Some(new BlockResult(
            dataDeserialize(blockId, data),
            DataReadMethod.Network,
            data.limit()))
        } else {
    
    
          return Some(data)
        }
      }
      logDebug(s"The value of block $blockId is null")
    }
    logDebug(s"Block $blockId not found")
    None
  }

写入数据 BlockManager.doPut

  /**
   * 1. blockStore.putBytes 写入内存或磁盘
   * 2. reportBlockStatus 写完以后汇报 BlockManagerMasterActor
   * 3. replicate 如果要复用的话,随机挑一个blockmanager 然后blockTransferService将数据异步写入到其他的blockManager上
   */
  private def doPut(
      blockId: BlockId,
      data: BlockValues,
      level: StorageLevel,
      tellMaster: Boolean = true,
      effectiveStorageLevel: Option[StorageLevel] = None)
    : Seq[(BlockId, BlockStatus)] = {
    
    

    require(blockId != null, "BlockId is null")
    require(level != null && level.isValid, "StorageLevel is null or invalid")
    effectiveStorageLevel.foreach {
    
     level =>
      require(level != null && level.isValid, "Effective StorageLevel is null or invalid")
    }

    // Return value
    val updatedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]

    /** Remember the block's storage level so that we can correctly drop it to disk if it needs
     * to be dropped right after it got put into memory. Note, however, that other threads will
     * not be able to get() this block until we call markReady on its BlockInfo.
     * 为要写入的block创建了blockInfo,并将其放入blockInfo map中
     */
    val putBlockInfo = {
    
    
      val tinfo = new BlockInfo(level, tellMaster)
      // Do atomically !
      val oldBlockOpt = blockInfo.putIfAbsent(blockId, tinfo)
      if (oldBlockOpt.isDefined) {
    
    
        if (oldBlockOpt.get.waitForReady()) {
    
    
          logWarning(s"Block $blockId already exists on this machine; not re-adding it")
          return updatedBlocks
        }
        // TODO: So the block info exists - but previous attempt to load it (?) failed.
        // What do we do now ? Retry on it ?
        oldBlockOpt.get
      } else {
    
    
        tinfo
      }
    }

    val startTimeMs = System.currentTimeMillis

    /* If we're storing values and we need to replicate the data, we'll want access to the values,
     * but because our put will read the whole iterator, there will be no values left. For the
     * case where the put serializes data, we'll remember the bytes, above; but for the case where
     * it doesn't, such as deserialized storage, let's rely on the put returning an Iterator. */
    var valuesAfterPut: Iterator[Any] = null

    // Ditto for the bytes after the put
    var bytesAfterPut: ByteBuffer = null

    // Size of the block in bytes
    var size = 0L

    // The level we actually use to put the block
    val putLevel = effectiveStorageLevel.getOrElse(level)

    // If we're storing bytes, then initiate the replication before storing them locally.
    // This is faster as data is already serialized and ready to send.
    val replicationFuture = data match {
    
    
      case b: ByteBufferValues if putLevel.replication > 1 =>
        // Duplicate doesn't copy the bytes, but just creates a wrapper
        val bufferView = b.buffer.duplicate()
        Future {
    
     replicate(blockId, bufferView, putLevel) }
      case _ => null
    }

    // 尝试对blockinfo加锁,进行多线程并发访问同步
    putBlockInfo.synchronized {
    
    
      logTrace("Put for block %s took %s to get into synchronized block"
        .format(blockId, Utils.getUsedTimeMs(startTimeMs)))

      var marked = false
      try {
    
    
        // returnValues - Whether to return the values put
        // blockStore - The type of storage to put these values into
        // 首先根据持久化级别,选择一种BLockStore: MemoryStore,DiskStore等
        val (returnValues, blockStore: BlockStore) = {
    
    
          if (putLevel.useMemory) {
    
    
            // Put it in memory first, even if it also has useDisk set to true;
            // We will drop it to disk later if the memory store can't hold it.
            (true, memoryStore)
          } else if (putLevel.useOffHeap) {
    
    
            // Use tachyon for off-heap storage
            (false, tachyonStore)
          } else if (putLevel.useDisk) {
    
    
            // Don't get back the bytes from put unless we replicate them
            (putLevel.replication > 1, diskStore)
          } else {
    
    
            assert(putLevel == StorageLevel.NONE)
            throw new BlockException(
              blockId, s"Attempted to put block $blockId without specifying storage level!")
          }
        }

        /**
         * Actually put the values
         * 根据选择的store,然后根据数据的类型,将数据放入到store中
         */
        val result = data match {
    
    
          case IteratorValues(iterator) =>
            blockStore.putIterator(blockId, iterator, putLevel, returnValues)
          case ArrayValues(array) =>
            blockStore.putArray(blockId, array, putLevel, returnValues)
          case ByteBufferValues(bytes) =>
            bytes.rewind()
            //  TODO  1) MemoryStore.putBytes | DiskStore.putBytes
            blockStore.putBytes(blockId, bytes, putLevel)
        }
        size = result.size
        result.data match {
    
    
          case Left (newIterator) if putLevel.useMemory => valuesAfterPut = newIterator
          case Right (newBytes) => bytesAfterPut = newBytes
          case _ =>
        }

        // Keep track of which blocks are dropped from memory
        if (putLevel.useMemory) {
    
    
          result.droppedBlocks.foreach {
    
     updatedBlocks += _ }
        }

        // 获取到一个Block对应点BlockStatus
        val putBlockStatus = getCurrentBlockStatus(blockId, putBlockInfo)
        if (putBlockStatus.storageLevel != StorageLevel.NONE) {
    
    
          // Now that the block is in either the memory, tachyon, or disk store,
          // let other threads read it, and tell the master about it.
          marked = true
          putBlockInfo.markReady(size)
          if (tellMaster) {
    
    
            // TODO 2) 将写入的block数据发送到BlockManagerMasterActor,以便进行block元数据的同步和维护
            reportBlockStatus(blockId, putBlockInfo, putBlockStatus)
          }
          updatedBlocks += ((blockId, putBlockStatus))
        }
      } finally {
    
    
        // If we failed in putting the block to memory/disk, notify other possible readers
        // that it has failed, and then remove it from the block info map.
        if (!marked) {
    
    
          // Note that the remove must happen before markFailure otherwise another thread
          // could've inserted a new BlockInfo before we remove it.
          blockInfo.remove(blockId)
          putBlockInfo.markFailure()
          logWarning(s"Putting block $blockId failed")
        }
      }
    }
    logDebug("Put block %s locally took %s".format(blockId, Utils.getUsedTimeMs(startTimeMs)))

    // Either we're storing bytes and we asynchronously started replication, or we're storing
    // values and need to serialize and replicate them now:
    // 3) 如果持久化级别定义了_2这种后缀,说明需要对block进行replica,然后传输到其他节点
    if (putLevel.replication > 1) {
    
    
      data match {
    
    
        case ByteBufferValues(bytes) =>
          if (replicationFuture != null) {
    
    
            Await.ready(replicationFuture, Duration.Inf)
          }
        case _ =>
          val remoteStartTime = System.currentTimeMillis
          // Serialize the block if not already done
          if (bytesAfterPut == null) {
    
    
            if (valuesAfterPut == null) {
    
    
              throw new SparkException(
                "Underlying put returned neither an Iterator nor bytes! This shouldn't happen.")
            }
            bytesAfterPut = dataSerialize(blockId, valuesAfterPut)
          }
          // TODO 3) 复制操作
          replicate(blockId, bytesAfterPut, putLevel)
          logDebug("Put block %s remotely took %s"
            .format(blockId, Utils.getUsedTimeMs(remoteStartTime)))
      }
    }

    BlockManager.dispose(bytesAfterPut)

    if (putLevel.replication > 1) {
    
    
      logDebug("Putting block %s with replication took %s"
        .format(blockId, Utils.getUsedTimeMs(startTimeMs)))
    } else {
    
    
      logDebug("Putting block %s without replication took %s"
        .format(blockId, Utils.getUsedTimeMs(startTimeMs)))
    }

    updatedBlocks
  }

=> 写入内存 MemoryStore.putBytes -> putIterator -> putArray -> tryToPut
  /**
   * 优先放入内存,不行的话尝试移除部分旧数据,再将block存入
   */
  private def tryToPut(
      blockId: BlockId,
      value: Any,
      size: Long,
      deserialized: Boolean): ResultWithDroppedBlocks = {
    
    

    /* TODO: Its possible to optimize the locking by locking entries only when selecting blocks
     * to be dropped. Once the to-be-dropped blocks have been selected, and lock on entries has
     * been released, it must be ensured that those to-be-dropped blocks are not double counted
     * for freeing up more space for another block that needs to be put. Only then the actually
     * dropping of blocks (and writing to disk if necessary) can proceed in parallel. */

    var putSuccess = false
    val droppedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]

    /**
     * 此处要加多线程
     * 如刚判定内存足够放,但其他线程放入了数据,此时再往内存放入数据时,直接oom,内存溢出
     */
    accountingLock.synchronized {
    
    
      /**
       *  TODO ensureFreeSpace
       *  判断内存是否够用,如果不够用,此时会将部分数据用用dropFromMemory方法尝试
       *  写入磁盘,如果持久化级别不支持磁盘,那么数据丢失
       */
      val freeSpaceResult = ensureFreeSpace(blockId, size)
      val enoughFreeSpace = freeSpaceResult.success
      droppedBlocks ++= freeSpaceResult.droppedBlocks

      // 将数据写入内存时 首先调用 enoughFreeSpace 判断内存是否足够放入数据
      if (enoughFreeSpace) {
    
    
        // 给数据创建一份MemoryEntry
        val entry = new MemoryEntry(value, size, deserialized)
        entries.synchronized {
    
    
          // 将数据放入内存的entries中
          entries.put(blockId, entry)
          currentMemory += size
        }
        val valuesOrBytes = if (deserialized) "values" else "bytes"
        logInfo("Block %s stored as %s in memory (estimated size %s, free %s)".format(
          blockId, valuesOrBytes, Utils.bytesToString(size), Utils.bytesToString(freeMemory)))
        putSuccess = true
      } else {
    
    
        // Tell the block manager that we couldn't put it in memory so that it can drop it to
        // disk if the block allows disk storage.
        val data = if (deserialized) {
    
    
          Left(value.asInstanceOf[Array[Any]])
        } else {
    
    
          Right(value.asInstanceOf[ByteBuffer].duplicate())
        }
        val droppedBlockStatus = blockManager.dropFromMemory(blockId, data)
        droppedBlockStatus.foreach {
    
     status => droppedBlocks += ((blockId, status)) }
      }
    }
    ResultWithDroppedBlocks(putSuccess, droppedBlocks)
  }

==> ensureFreeSpace
  private def ensureFreeSpace(
      blockIdToAdd: BlockId,
      space: Long): ResultWithDroppedBlocks = {
    
    
    logInfo(s"ensureFreeSpace($space) called with curMem=$currentMemory, maxMem=$maxMemory")

    val droppedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]

    if (space > maxMemory) {
    
    
      logInfo(s"Will not store $blockIdToAdd as it is larger than our memory limit")
      return ResultWithDroppedBlocks(success = false, droppedBlocks)
    }

    // Take into account the amount of memory currently occupied by unrolling blocks
    val actualFreeMemory = freeMemory - currentUnrollMemory

    // 如果当前内存不足够将这个block放入的话
    if (actualFreeMemory < space) {
    
    
      val rddToAdd = getRddId(blockIdToAdd)
      val selectedBlocks = new ArrayBuffer[BlockId]
      var selectedMemory = 0L

      // This is synchronized to ensure that the set of entries is not changed
      // (because of getValue or getBytes) while traversing the iterator, as that
      // can lead to exceptions.
      entries.synchronized {
    
    
        val iterator = entries.entrySet().iterator()
        // 尝试从entries中移除一部分数据
        while (actualFreeMemory + selectedMemory < space && iterator.hasNext) {
    
    
          val pair = iterator.next()
          val blockId = pair.getKey
          if (rddToAdd.isEmpty || rddToAdd != getRddId(blockId)) {
    
    
            selectedBlocks += blockId
            selectedMemory += pair.getValue.size
          }
        }
      }

      // 判断 如果移除一部分数据之后就可以存放新的block了
      if (actualFreeMemory + selectedMemory >= space) {
    
    
        logInfo(s"${selectedBlocks.size} blocks selected for dropping")
        // 将之前选择的要移除的block数据 遍历
        for (blockId <- selectedBlocks) {
    
    
          val entry = entries.synchronized {
    
     entries.get(blockId) }
          // This should never be null as only one thread should be dropping
          // blocks and removing entries. However the check is still here for
          // future safety.
          if (entry != null) {
    
    
            val data = if (entry.deserialized) {
    
    
              Left(entry.value.asInstanceOf[Array[Any]])
            } else {
    
    
              Right(entry.value.asInstanceOf[ByteBuffer].duplicate())
            }
            // TODO dropFromMemory 尝试将数据写入磁盘,如果block的持久化级别没有说可以写入磁盘,那么数据就彻底丢了
            val droppedBlockStatus = blockManager.dropFromMemory(blockId, data)
            droppedBlockStatus.foreach {
    
     status => droppedBlocks += ((blockId, status)) }
          }
        }
        return ResultWithDroppedBlocks(success = true, droppedBlocks)
      } else {
    
    
        logInfo(s"Will not store $blockIdToAdd as it would require dropping another block " +
          "from the same RDD")
        return ResultWithDroppedBlocks(success = false, droppedBlocks)
      }
    }
    ResultWithDroppedBlocks(success = true, droppedBlocks)
  }

=> 写入磁盘 DiskStore.putBytes
  // 使用java nio将数据写入磁盘文件
  override def putBytes(blockId: BlockId, _bytes: ByteBuffer, level: StorageLevel): PutResult = {
    
    
    // So that we do not modify the input offsets !
    // duplicate does not copy buffer, so inexpensive
    val bytes = _bytes.duplicate()
    logDebug(s"Attempting to put block $blockId")
    val startTime = System.currentTimeMillis
    val file = diskManager.getFile(blockId)
    val channel = new FileOutputStream(file).getChannel
    while (bytes.remaining > 0) {
    
    
      channel.write(bytes)
    }
    channel.close()
    val finishTime = System.currentTimeMillis
    logDebug("Block %s stored as %s file on disk in %d ms".format(
      file.getName, Utils.bytesToString(bytes.limit), finishTime - startTime))
    PutResult(bytes.limit(), Right(bytes.duplicate()))
  }

猜你喜欢

转载自blog.csdn.net/m0_46449152/article/details/109558351