Giraph: The process of read Vertices

GraphTaskManager.execute

/**
  * Perform the work assigned to this compute node for this job run.
  * 1) Run checkpoint per frequency policy.
  * 2) For every vertex on this mapper, run the compute() function
  * 3) Wait until all messaging is done.
  * 4) Check if all vertices are done.  If not goto 2).
  * 5) Dump output.
  */
  public void execute() throws IOException, InterruptedException {
  // omit some lines
  serviceWorker.setup();
  // omit some lines
  }

The type of serviceWorker is BspServiceWorker.

serviceWorker = new BspServiceWorker<I, V, E>(context, this);

BspServiceWorker.setup()

public FinishedSuperstepStats setup() {
    // Unless doing a restart, prepare for computation:
    // 1. Start superstep INPUT_SUPERSTEP (no computation)
    // 2. Wait until the INPUT_SPLIT_ALL_READY_PATH node has been created
    // 3. Process input splits until there are no more.
    // 4. Wait until the INPUT_SPLIT_ALL_DONE_PATH node has been created
    // 5. Process any mutations deriving from add edge requests
    // 6. Wait for superstep INPUT_SUPERSTEP to complete.

 public FinishedSuperstepStats setup() {
     // omit some lines
     vertexEdgeCount = loadVertices();
      // omit some lines
 }

BspServiceWorker.loadVertices

private VertexEdgeCount loadVertices() throws KeeperException,
      InterruptedException {
    VertexInputSplitsCallableFactory<I, V, E> inputSplitsCallableFactory =
        new VertexInputSplitsCallableFactory<I, V, E>(
            getConfiguration().createWrappedVertexInputFormat(),
            getContext(),
            getConfiguration(),
            this,
            inputSplitsHandler);

    return loadInputSplits(inputSplitsCallableFactory);
  }

BspServiceWorker.loadInputSplits

/**
   * Load the vertices/edges from input slits. Do this until all the
   * InputSplits have been processed.
   * All workers will try to do as many InputSplits as they can.  The master
   * will monitor progress and stop this once all the InputSplits have been
   * loaded and check-pointed.  Keep track of the last input split path to
   * ensure the input split cache is flushed prior to marking the last input
   * split complete.
   *
   * Use one or more threads to do the loading.
   *
   * @param inputSplitsCallableFactory Factory for {@link InputSplitsCallable}s
   * @return Statistics of the vertices and edges loaded
   * @throws InterruptedException
   * @throws KeeperException
   */
private VertexEdgeCount loadInputSplits(
      CallableFactory<VertexEdgeCount> inputSplitsCallableFactory)
    throws KeeperException, InterruptedException {
    VertexEdgeCount vertexEdgeCount = new VertexEdgeCount();
    int numThreads = getConfiguration().getNumInputSplitsThreads();

    List<VertexEdgeCount> results =
        ProgressableUtils.getResultsWithNCallables(inputSplitsCallableFactory,
            numThreads, "load-%d", getContext());
    for (VertexEdgeCount result : results) {
      vertexEdgeCount = vertexEdgeCount.incrVertexEdgeCount(result);
    }

    workerClient.waitAllRequests();
    return vertexEdgeCount;
  }

ProgressableUtils.getResultsWithNCallables

/**
   * Create {#link numThreads} callables from {#link callableFactory},
   * execute them and gather results.
   */
  public static <R> List<R> getResultsWithNCallables(
      CallableFactory<R> callableFactory, int numThreads,
      String threadNameFormat, Progressable progressable) {
    ExecutorService executorService = Executors.newFixedThreadPool(numThreads,
        ThreadUtils.createThreadFactory(threadNameFormat));
    HashMap<Integer, Future<R>> futures = new HashMap<>(numThreads);
    for (int i = 0; i < numThreads; i++) {
      Callable<R> callable = callableFactory.newCallable(i);
      Future<R> future = executorService.submit(
          new LogStacktraceCallable<R>(callable));
      futures.put(i, future);
    }

InputSplitsCallable.call

InputSplitsCallable.call deal with all splits

@Override
  public VertexEdgeCount call() {
    VertexEdgeCount vertexEdgeCount = new VertexEdgeCount();
    int inputSplitsProcessed = 0;
    try {

      while (true) {
        byte[] serializedInputSplit = splitsHandler.reserveInputSplit(
            getInputType(), inputSplitsProcessed == 0);
        if (serializedInputSplit == null) {
          // No splits left
          break;
        }
        vertexEdgeCount = vertexEdgeCount.incrVertexEdgeCount(
            loadInputSplit(serializedInputSplit));
        context.progress();
        ++inputSplitsProcessed;
      }
    } catch (InterruptedException e) {
      // ignoring
    }
    return vertexEdgeCount;
  }

InputSplitsCallable.loadInputSplit for one split

 /**
   * Extract vertices from input split, saving them into a mini cache of
   * partitions.  Periodically flush the cache of vertices when a limit is
   * reached in readVerticeFromInputSplit.
   * Mark the input split finished when done.

   */
  private VertexEdgeCount loadInputSplit(byte[] serializedInputSplit)
      throws IOException, ClassNotFoundException, InterruptedException {
    InputSplit inputSplit = getInputSplit(serializedInputSplit);
    VertexEdgeCount vertexEdgeCount = readInputSplit(inputSplit);
    return vertexEdgeCount;
  }

VertexInputSplitsCallable.readInputSplit

@Override
  protected VertexEdgeCount readInputSplit(
      InputSplit inputSplit) throws IOException, InterruptedException {
    VertexReader<I, V, E> vertexReader =
        vertexInputFormat.createVertexReader(inputSplit, context);

    vertexReader.initialize(inputSplit, context);
   
    int count = 0;
   
    while (vertexReader.nextVertex()) {
    
      Vertex<I, V, E> readerVertex = vertexReader.getCurrentVertex();
    
      if (readerVertex.getValue() == null) {
        readerVertex.setValue(configuration.createVertexValue());
      }
      readerVertex.setConf(configuration);

      ++inputSplitVerticesLoaded;

      

      // Before saving to partition-store translate all edges (if present)
      if (translateEdge != null) {
        // only iff vertexInput reads edges also
        if (readerVertex.getEdges() != null && readerVertex.getNumEdges() > 0) {
          OutEdges<I, E> vertexOutEdges = configuration
              .createAndInitializeOutEdges(readerVertex.getNumEdges());
       
          for (Edge<I, E> edge : readerVertex.getEdges()) {    
              vertexOutEdges.add(configuration.createEdge(translateEdge, edge));
          }
          // set out edges to translated instance -> old instance is released
          readerVertex.setEdges(vertexOutEdges);
        }
      }

      PartitionOwner partitionOwner =
          bspServiceWorker.getVertexPartitionOwner(readerVertex.getId());
      workerClientRequestProcessor.sendVertexRequest(
          partitionOwner, readerVertex);
      edgesSinceLastUpdate += readerVertex.getNumEdges();
   }
    vertexReader.close();

    WorkerProgress.get().addVerticesLoaded(
        inputSplitVerticesLoaded % VERTICES_UPDATE_PERIOD);
    WorkerProgress.get().incrementVertexInputSplitsLoaded();

    return new VertexEdgeCount(inputSplitVerticesLoaded,
        inputSplitEdgesLoaded + edgesSinceLastUpdate, 0);
  }

NettyWorkerClientRequestProcessor.sendVertexRequest


  @Override
  public boolean sendVertexRequest(PartitionOwner partitionOwner,
      Vertex<I, V, E> vertex) {
    // Add the vertex to the cache
    int workerMessageSize = sendPartitionCache.addVertex(
        partitionOwner, vertex);

    // Send a request if the cache of outgoing message to
    // the remote worker 'workerInfo' is full enough to be flushed
    if (workerMessageSize >= maxVerticesSizePerWorker) {
      PairList<Integer, ExtendedDataOutput>
          workerPartitionVertices =
          sendPartitionCache.removeWorkerData(partitionOwner.getWorkerInfo());
      WritableRequest writableRequest =
          new SendWorkerVerticesRequest<I, V, E>(
              configuration, workerPartitionVertices);
      doRequest(partitionOwner.getWorkerInfo(), writableRequest);
      return true;
    }

    return false;
  }

Vertex reader read vertex and i’ts edges information, first store in local cache.
catch size: 629145.
maxVerticesSizePerWorker: default 512k

 public void doRequest(WorkerInfo workerInfo,
                         WritableRequest writableRequest) {
    // If this is local, execute locally
    if (serviceWorker.getWorkerInfo().getTaskId() ==
        workerInfo.getTaskId()) {
      ((WorkerRequest) writableRequest).doRequest(serverData);
      localRequests.inc();
    } else {
      workerClient.sendWritableRequest(
          workerInfo.getTaskId(), writableRequest);
      remoteRequests.inc();
    }
  }

Target Work

Target the work receives the request. call SendWorkerVerticesRequest.doRequest

SendWorkerVerticesRequest.doRequest

public void doRequest(ServerData<I, V, E> serverData) {
    PairList<Integer, ExtendedDataOutput>.Iterator
        iterator = workerPartitions.getIterator();
    while (iterator.hasNext()) {
      iterator.next();
      serverData.getPartitionStore()
          .addPartitionVertices(iterator.getCurrentFirst(),
              iterator.getCurrentSecond());
    }
  }

If DiskBackedPartitionStore use used, it will DiskBackedPartitionStore.addEntry

DiskBackedPartitionStore.addPartitionVertices

@Override
  public void addPartitionVertices(Integer partitionId,
                                   ExtendedDataOutput extendedDataOutput) {
    ReadWriteLock rwLock = getPartitionLock(partitionId);
    rwLock.readLock().lock();
    addEntry(partitionId, extendedDataOutput);
    rwLock.readLock().unlock();
  }

DiskBackedPartitionStore.addEntry

addEntry will check whether the partition hasPartitionDataOnDisk. At first, it will be false, so call addEntryToInMemoryPartitionData.

protected void addEntry(int partitionId, T entry) {
  
    ReadWriteLock rwLock = getPartitionLock(partitionId);
    rwLock.readLock().lock();
    if (hasPartitionDataOnDisk.contains(partitionId)) {
    // omit some lines
    } else {
      addEntryToInMemoryPartitionData(partitionId, entry);
    }
    rwLock.readLock().unlock();
  }

DiskBackedPartitionStore.addEntryToInMemoryPartitionData

 @Override
  protected void addEntryToInMemoryPartitionData(int partitionId,
                                                 ExtendedDataOutput vertices) {
    if (!partitionStore.hasPartition(partitionId)) {
      oocEngine.getMetaPartitionManager().addPartition(partitionId);
    }
    partitionStore.addPartitionVertices(partitionId, vertices);
  }

MetaPartitionManager manages the partition meta information.
partitionStore stores the vertex data.

MetaPartitionManager.addPartition

 public void addPartition(int partitionId) {
    MetaPartition meta = new MetaPartition(partitionId);
    MetaPartition temp = partitions.putIfAbsent(partitionId, meta);
    // Check if the given partition is new
    if (temp == null) {
      int index = indexCounter.getAndIncrement();
      checkState(partitionIndex.putIfAbsent(partitionId, index) == null);
      int ownerThread = getOwnerThreadId(partitionId);
      perThreadPartitionDictionary.get(ownerThread).addPartition(meta);
      numInMemoryPartitions.getAndIncrement();
    }
  }

MetaPartitionManager.addPartition put the partition for specific io thread, and increase numInMemoryPartitions, to it can let OutOfCoreCallable offload the partition asynchronously. Please refer The process of OutOfCoreCallable

  /**
   * Get the thread id that is responsible for a particular partition
   *
   * @param partitionId id of the given partition
   * @return id of the thread responsible for the given partition
   */
  public int getOwnerThreadId(int partitionId) {
    Integer index = partitionIndex.get(partitionId);
    checkState(index != null);
    return index % numIOThreads;
  }

perThreadPartitionDictionary matains the IO thread for the partition.

MetaPartition

public MetaPartition(int partitionId) {
      this.partitionId = partitionId;
      this.processingState = ProcessingState.UNPROCESSED;
      this.partitionState = StorageState.IN_MEM;
      this.currentMessagesState = StorageState.IN_MEM;
      this.incomingMessagesState = StorageState.IN_MEM;
    }

The type of partitionStore in DiskBackedPartitionStore is SimplePartitionStore

SimplePartitionStore.addPartitionVertices

@Override
  public void addPartitionVertices(Integer partitionId,
      ExtendedDataOutput extendedDataOutput) {
    VertexIterator<I, V, E> vertexIterator =
        new VertexIterator<I, V, E>(extendedDataOutput, conf);

    Partition<I, V, E> partition = getOrCreatePartition(partitionId);
    partition.addPartitionVertices(vertexIterator);
    putPartition(partition);
  }

getOrCreatePartition

 private Partition<I, V, E> getOrCreatePartition(Integer partitionId) {
    Partition<I, V, E> oldPartition = partitions.get(partitionId);
    if (oldPartition == null) {
      Partition<I, V, E> newPartition =
          conf.createPartition(partitionId, context);
      oldPartition = partitions.putIfAbsent(partitionId, newPartition);
      if (oldPartition == null) {
        return newPartition;
      }
    }
    return oldPartition;
  }

The type of Partition is SimplePartition, SimplePartition extends BasicPartition.

BasicPartition.addPartitionVertices

public void addPartitionVertices(VertexIterator<I, V, E> vertexIterator) {
    while (vertexIterator.hasNext()) {
      vertexIterator.next();
      // Release the vertex if it was put, otherwise reuse as an optimization
      if (putOrCombine(vertexIterator.getVertex())) {
        vertexIterator.releaseVertex();
      }
    }
  }

public boolean putOrCombine(Vertex<I, V, E> vertex) {
    Vertex<I, V, E> originalVertex = vertexMap.get(vertex.getId());
    if (originalVertex == null) {
      originalVertex =
          vertexMap.putIfAbsent(vertex.getId(), vertex);
      if (originalVertex == null) {
        return true;
      }
    }

SimplePartition uses a vertexMap to store the vertices of the partition.

  private ConcurrentMap<I, Vertex<I, V, E>> vertexMap;

offload the partition

If the partition is in memory, the backend IOCallable thread can offload the partition. What if the partition has been offloaded.

DiskBackedDataStore.addEntry

If hasPartitionDataOnDisk.contains(partitionId) returns true, it will add in dataBuffers, It will be checked in The process of OutOfCoreCallable

// Lock partitoin lock first.
if (hasPartitionDataOnDisk.contains(partitionId)) {
      List<T> entryList = new ArrayList<>();
      entryList.add(entry);
      int entrySize = entrySerializedSize(entry);
      MutablePair<Integer, List<T>> newPair =
          new MutablePair<>(entrySize, entryList);
      Pair<Integer, List<T>> oldPair =
          dataBuffers.putIfAbsent(partitionId, newPair);
      if (oldPair != null) {
        synchronized (oldPair) {
          newPair = (MutablePair<Integer, List<T>>) oldPair;
          newPair.setLeft(oldPair.getLeft() + entrySize);
          newPair.getRight().add(entry);
        }
      }
    }

OutOfCoreIOScheduler.getNextIOCommand

The actions getNextIOActions of FixedPartitionOracle returned always contains STORE_MESSAGES_AND_BUFFERS.

IOAction.STORE_MESSAGES_AND_BUFFERS};

case STORE_MESSAGES_AND_BUFFERS:
          partitionId = oocEngine.getMetaPartitionManager()
              .getOffloadPartitionBufferId(threadId);

MetaPartitionManager.getOffloadPartitionBufferId

public Integer getOffloadPartitionBufferId(int threadId) {
    if (oocEngine.getSuperstep() == BspServiceWorker.INPUT_SUPERSTEP) {
      Integer partitionId =
          popFromSet(perThreadVertexEdgeBuffers.get(threadId));
      if (partitionId == null) {
        DiskBackedPartitionStore<?, ?, ?> partitionStore =
            (DiskBackedPartitionStore<?, ?, ?>) (oocEngine.getServerData()
                .getPartitionStore());
        perThreadVertexEdgeBuffers.get(threadId)
            .addAll(partitionStore.getCandidateBuffersToOffload(threadId));
        DiskBackedEdgeStore<?, ?, ?> edgeStore =
            (DiskBackedEdgeStore<?, ?, ?>) (oocEngine.getServerData())
                .getEdgeStore();
        perThreadVertexEdgeBuffers.get(threadId)
            .addAll(edgeStore.getCandidateBuffersToOffload(threadId));
        partitionId = popFromSet(perThreadVertexEdgeBuffers.get(threadId));
      }
      return partitionId;
    }
    return null;
  }

DiskBackedDataStore.getCandidateBuffersToOffload

The default value of minBufferSizeToOffload is 8M, so if the buffer size less than 8M, it will not store on disk, you can set it to 1kb to trace the process. The result contains the partition ids to be processed.

public Set<Integer> getCandidateBuffersToOffload(int ioThreadId) {
    Set<Integer> result = new HashSet<>();
    for (Map.Entry<Integer, Pair<Integer, List<T>>> entry :
        dataBuffers.entrySet()) {
      int partitionId = entry.getKey();
      long aggregateBufferSize = entry.getValue().getLeft();
      if (aggregateBufferSize > minBufferSizeToOffload &&
          oocEngine.getMetaPartitionManager().getOwnerThreadId(partitionId) ==
              ioThreadId) {
        result.add(partitionId);
      }
    }
    return result;
  }

StoreDataBufferIOCommand

When StoreDataBufferIOCommand is executed.

 case PARTITION:
        DiskBackedPartitionStore partitionStore =
            (DiskBackedPartitionStore)
                oocEngine.getServerData().getPartitionStore();
        numBytesTransferred +=
            partitionStore.offloadBuffers(partitionId);
        DiskBackedEdgeStore edgeStore =
            (DiskBackedEdgeStore) oocEngine.getServerData().getEdgeStore();
        numBytesTransferred += edgeStore.offloadBuffers(partitionId);

DiskBackedPartitionStore.offloadBuffers

 @Override
  public long offloadBuffers(int partitionId)
      throws IOException {
    return offloadBuffersProxy(partitionId,
        new DataIndex().addIndex(DataIndex.TypeIndexEntry.PARTITION));
  }

DiskBackedDataStore.offloadBuffersProxy

it will check the pair.getLeft() < minBufferSizeToOffload

protected long offloadBuffersProxy(int partitionId, DataIndex index)
      throws IOException {
    Pair<Integer, List<T>> pair = dataBuffers.get(partitionId);
    if (pair == null || pair.getLeft() < minBufferSizeToOffload) {
      return 0;
    }
    ReadWriteLock rwLock = getPartitionLock(partitionId);
    rwLock.writeLock().lock();
    pair = dataBuffers.remove(partitionId);
    rwLock.writeLock().unlock();
    checkNotNull(pair);
    checkState(!pair.getRight().isEmpty());
    int ioThreadId =
        oocEngine.getMetaPartitionManager().getOwnerThreadId(partitionId);
    index.addIndex(NumericIndexEntry.createPartitionEntry(partitionId))
        .addIndex(DataIndex.TypeIndexEntry.BUFFER);
    OutOfCoreDataAccessor.DataOutputWrapper outputWrapper =
        oocEngine.getDataAccessor().prepareOutput(ioThreadId, index.copy(),
            true);
    for (T entry : pair.getRight()) {
      writeEntry(entry, outputWrapper.getDataOutput());
    }
    long numBytes = outputWrapper.finalizeOutput();
    index.removeLastIndex().removeLastIndex();
    int numBuffers = pair.getRight().size();
    Integer oldNumBuffersOnDisk =
        numDataBuffersOnDisk.putIfAbsent(partitionId, numBuffers);
    if (oldNumBuffersOnDisk != null) {
      numDataBuffersOnDisk.replace(partitionId,
          oldNumBuffersOnDisk + numBuffers);
    }
    return numBytes;
  }