YARN-ApplicationMaster启动流程

一、流程图

在这里插入图片描述
基于Hadoop2.6版本

二、状态转化

  • RMAppImpl 状态转化
  1. NEW -> NEW_SAVING (STEP 1)
  2. NEW_SAVING -> SUBMITED (STEP 3)
  3. SUBMITED -> ACCEPTED (STEP 5)
  4. ACCEPTED -> RUNNING (STEP 19)
  • RMAppAttemptImpl 状态转化
  1. NEW -> SUBMITED (STEP 6)
  2. SUBMITED -> SCHEDULED (STEP 8)
  3. SCHEDULED -> ALLOCATED_SAVING (STEP 11)
  4. ALLOCATED_SAVING -> ALLOCATED (STEP 14)
  5. ALLOCATED -> LAUNCHED (STEP 16)
  6. LAUNCHED -> RUNNING (STEP 18)
  • RMContainerImpl 状态转化
  1. NEW -> ALLOCATED (STEP 10)
  2. ALLOCATED -> ACQUIRED (STEP 12)
  3. ACQUIRED -> RUNNING (STEP 15)

三、代码流程

1、ClientRMService#submitApplication && 向 RMAppImpl 发送RMAppEventType.START事件

// ClientRMService.java
public SubmitApplicationResponse submitApplication(
      SubmitApplicationRequest request) throws YarnException {
    try {
      // call RMAppManager to submit application directly
      rmAppManager.submitApplication(submissionContext,
          System.currentTimeMillis(), user);

      LOG.info("Application with id " + applicationId.getId() + 
          " submitted by user " + user);
    }
}

// RMAppManager.java
protected void submitApplication(
      ApplicationSubmissionContext submissionContext, long submitTime,
      String user) throws YarnException {
    RMAppImpl application =
        createAndPopulateNewRMApp(submissionContext, submitTime, user, false);
    try {
      if (UserGroupInformation.isSecurityEnabled()) {
        ...
      } else {
        // 向RMAppImpl 发送 START事件
        this.rmContext.getDispatcher().getEventHandler()
            .handle(new RMAppEvent(applicationId, RMAppEventType.START));
      }
    }
}

2、RMAppImpl 处理 START 事件 && 向 RMStateStore 发送RMStateStoreEventType.STORE_APP事件

// RMAppImpl.java
// 收到START事件,调用RMAppNewlySavingTransition函数,并且RMAppImpl状态由NEW转化成NEW_SAVING。
  .addTransition(RMAppState.NEW, RMAppState.NEW_SAVING,
        RMAppEventType.START, new RMAppNewlySavingTransition())

// RMAppNewlySavingTransition:RMStateStore保存RMAppImpl当前信息
  private static final class RMAppNewlySavingTransition extends RMAppTransition {
    public void transition(RMAppImpl app, RMAppEvent event) {
      LOG.info("Storing application with id " + app.applicationId);
      app.rmContext.getStateStore().storeNewApplication(app);
    }
  }
// RMStateStore.java
  public void storeNewApplication(RMApp app) {
    ...
    dispatcher.getEventHandler().handle(new RMStateStoreAppEvent(appState));
  }
// RMStateStoreAppEvent.java
  public RMStateStoreAppEvent(ApplicationStateData appState) {
    // RMAppImpl 向 RMStateStore 发送 RMStateStoreEventType.STORE_APP 事件
    super(RMStateStoreEventType.STORE_APP);
    this.appState = appState;
  }

3、RMStateStore 处理 STORE_APP 事件 && 向 RMAppImpl 发送RMAppEventType.APP_NEW_SAVED事件

// RMStateStore.java
  .addTransition(RMStateStoreState.DEFAULT, RMStateStoreState.DEFAULT,
          RMStateStoreEventType.STORE_APP, new StoreAppTransition())

  private static class StoreAppTransition
      implements SingleArcTransition<RMStateStore, RMStateStoreEvent> {
    @Override
    public void transition(RMStateStore store, RMStateStoreEvent event) {
      LOG.info("Storing info for app: " + appId);
      try {
        store.storeApplicationStateInternal(appId, appState);
        // 向RMAppImpl 发送 APP_NEW_SAVED事件
        store.notifyApplication(new RMAppEvent(appId,
               RMAppEventType.APP_NEW_SAVED));
      } catch (Exception e) {
        LOG.error("Error storing app: " + appId, e);
        store.notifyStoreOperationFailed(e);
      }
    };
  }

4、RMAppImpl 处理 APP_NEW_SAVED 事件 && 向 ResourceManager 发送SchedulerEventType.APP_ADDED事件

// RMAppImpl.java
  .addTransition(RMAppState.NEW_SAVING, RMAppState.SUBMITTED,
        RMAppEventType.APP_NEW_SAVED, new AddApplicationToSchedulerTransition())
  private static final class AddApplicationToSchedulerTransition extends
      RMAppTransition {
    @Override
    public void transition(RMAppImpl app, RMAppEvent event) {
      app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
        app.submissionContext.getQueue(), app.user,
        app.submissionContext.getReservationID()));
    }
  }
  public AppAddedSchedulerEvent(ApplicationId applicationId, String queue,
      String user, boolean isAppRecovering, ReservationId reservationID) {
    // 向 ResourceManager 发送 APP_ADDED 事件
    super(SchedulerEventType.APP_ADDED);
    this.applicationId = applicationId;
    this.queue = queue;
    this.user = user;
    this.reservationID = reservationID;
    this.isAppRecovering = isAppRecovering;
  }

5、ResourceManager 处理 APP_ADDED 事件 && 向 RMAppImpl 发送RMAppEventType.APP_ACCEPTED事件

// FairScheduler.java
  public void handle(SchedulerEvent event) {
    ..
    case APP_ADDED:
      if (!(event instanceof AppAddedSchedulerEvent)) {
        throw new RuntimeException("Unexpected event type: " + event);
      }
      AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
      // 进入 addApplication函数
      addApplication(appAddedEvent.getApplicationId(),
        appAddedEvent.getQueue(), appAddedEvent.getUser(),
        appAddedEvent.getIsAppRecovering());
      break;
  }
  
  private synchronized void addApplication(ApplicationId applicationId,
    String queueName, String user, boolean isAppRecovering) {
    ...
    if (isAppRecovering) {
      ..
    } else {
    // 向RMAppImpl 发送 APP_ACCEPTED 事件
      rmContext.getDispatcher().getEventHandler()
        .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
    }
  }

6、 RMAppImpl 处理APP_ACCEPTED事件 && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.START事件

// RMAppImpl.java
    .addTransition(RMAppState.SUBMITTED, RMAppState.ACCEPTED,
        RMAppEventType.APP_ACCEPTED, new StartAppAttemptTransition())
  private static final class StartAppAttemptTransition extends RMAppTransition {
    @Override
    public void transition(RMAppImpl app, RMAppEvent event) {
      app.createAndStartNewAttempt(false);
    };
  }
  private void
      createAndStartNewAttempt(boolean transferStateFromPreviousAttempt) {
    createNewAttempt();
    handler.handle(new RMAppStartAttemptEvent(currentAttempt.getAppAttemptId(),
      transferStateFromPreviousAttempt));
  }
  public RMAppStartAttemptEvent(ApplicationAttemptId appAttemptId,
      boolean transferStateFromPreviousAttempt) {
    // 向 RMAppAttemptImpl 发送 START 事件
    super(appAttemptId, RMAppAttemptEventType.START);
    this.transferStateFromPreviousAttempt = transferStateFromPreviousAttempt;
  }

7、RMAppAttemptImpl 处理 START事件 && 向 ResourceManager 发送SchedulerEventType.APP_ATTEMPT_ADDED事件

// RMAppAttemptImpl.java
  .addTransition(RMAppAttemptState.NEW, RMAppAttemptState.SUBMITTED,RMAppAttemptEventType.START, 
  new AttemptStartedTransition())

  private static final class AttemptStartedTransition extends BaseTransition {
	@Override
    public void transition(RMAppAttemptImpl appAttempt,
        RMAppAttemptEvent event) {
      // Register with the ApplicationMasterService
      appAttempt.masterService
          .registerAppAttempt(appAttempt.applicationAttemptId);
      // Add the applicationAttempt to the scheduler and inform the scheduler
      // whether to transfer the state from previous attempt.
      appAttempt.eventHandler.handle(new AppAttemptAddedSchedulerEvent(
        appAttempt.applicationAttemptId, transferStateFromPreviousAttempt));
    }
  }
  public AppAttemptAddedSchedulerEvent(
      ApplicationAttemptId applicationAttemptId,
      boolean transferStateFromPreviousAttempt,
      boolean isAttemptRecovering) {
    // 向 ResourceManager 发送 APP_ATTEMPT_ADDED 事件
    super(SchedulerEventType.APP_ATTEMPT_ADDED);
    this.applicationAttemptId = applicationAttemptId;
    this.transferStateFromPreviousAttempt = transferStateFromPreviousAttempt;
    this.isAttemptRecovering = isAttemptRecovering;
  }

8、ResourceManager 处理 APP_ATTEMPT_ADDED 事件 && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.ATTEMPT_ADDED事件

// FairScheduler.java
  public void handle(SchedulerEvent event) {
    ..
    case APP_ATTEMPT_ADDED:
      addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
        appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
        appAttemptAddedEvent.getIsAttemptRecovering());
  }
  protected synchronized void addApplicationAttempt(
      ApplicationAttemptId applicationAttemptId,
      boolean transferStateFromPreviousAttempt,
      boolean isAttemptRecovering) {  
    if (isAttemptRecovering) {
    } else {
      // 向 RMAppAttemptImpl 发送 ATTEMPT_ADDED 事件
      rmContext.getDispatcher().getEventHandler().handle(
        new RMAppAttemptEvent(applicationAttemptId,
            RMAppAttemptEventType.ATTEMPT_ADDED));
    }
  }

9、RMAppAttemptImpl 处理 ATTEMPT_ADDED 事件 && appAttempt.scheduler.allocate

// RMAppAttemptImpl.java
    .addTransition(RMAppAttemptState.SUBMITTED, 
          EnumSet.of(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING,
                     RMAppAttemptState.SCHEDULED), RMAppAttemptEventType.ATTEMPT_ADDED,
          new ScheduleTransition())
  public static final class ScheduleTransition
      implements
      MultipleArcTransition<RMAppAttemptImpl, RMAppAttemptEvent, RMAppAttemptState> {
    @Override
    public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
        RMAppAttemptEvent event) {
      // 获取RM是否应该管理AM的执行。如果为真,那么RM将不会为AM分配一个容器并启动它,默认是false
      ApplicationSubmissionContext subCtx = appAttempt.submissionContext;
      if (!subCtx.getUnmanagedAM()) {
        // AM resource has been checked when submission
        // <<<<<< 调度器分配资源 allocate >>>>>>>
        Allocation amContainerAllocation =
            appAttempt.scheduler.allocate(
                appAttempt.applicationAttemptId,
                Collections.singletonList(appAttempt.amReq),
                EMPTY_CONTAINER_RELEASE_LIST,
                amBlacklist.getAdditions(),
                amBlacklist.getRemovals());
      }
      // 返回 SCHEDULED 事件
      return RMAppAttemptState.SCHEDULED;
    }
  }

10、从 RMAppAttemptImpl的 allocate分配AM资源 && 向RMContainerImpl发送START事件

对于FairSchduler调度器处理过程简单描述:

  • 1、收到Node_UPDATE事件
  • 2、FairScheduler#nodeUpdate -> attemptScheduling
    Resource assignment = queueMgr.getRootQueue().assignContainer(node)
  • 3、FSParentQueue#assignContainer
    assigned = child.assignContainer(node);
  • 4、FSLeafQueue#assignContainer
    assigned = sched.assignContainer(node);
  • 5、FSAppAttempt#assignContainer
    RMContainer allocatedContainer = allocate(type, node, request.getPriority(), request, container);
  • 6、FSAppAttempt#allocate
    rmContainer.handle(new RMContainerEvent(container.getId(), RMContainerEventType.START));

11、RMContainerImpl 处理 START 事件 && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.CONTAINER_ALLOCATED事件

// RMContainerImpl.java
  .addTransition(RMContainerState.NEW, RMContainerState.ALLOCATED,
        RMContainerEventType.START, new ContainerStartedTransition())
  private static final class ContainerStartedTransition extends
      BaseTransition {
    public void transition(RMContainerImpl container, RMContainerEvent event) {
      container.eventHandler.handle(new RMAppAttemptContainerAllocatedEvent(
          container.appAttemptId));
    }
  }
  public RMAppAttemptContainerAllocatedEvent(ApplicationAttemptId appAttemptId) {
    // 向 RMAppAttemptImpl 发送 CONTAINER_ALLOCATED 事件
    super(appAttemptId, RMAppAttemptEventType.CONTAINER_ALLOCATED);
  }

12、RMAppAttemptImpl 处理 CONTAINER_ALLOCATED 事件 && 向 RMContainerImpl发送 ACQUIRED 事件 && 向 RMStateStore 发送 STORE_APP_ATTEMPT 事件

// RMAppAttemptImpl.java
  .addTransition(RMAppAttemptState.SCHEDULED, EnumSet.of(RMAppAttemptState.ALLOCATED_SAVING, RMAppAttemptState.SCHEDULED), RMAppAttemptEventType.CONTAINER_ALLOCATED,
     new AMContainerAllocatedTransition())

private static final class AMContainerAllocatedTransition
      implements
      MultipleArcTransition<RMAppAttemptImpl, RMAppAttemptEvent, RMAppAttemptState> {
    @Override
    public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
        RMAppAttemptEvent event) {
      // Acquire the AM container from the scheduler.
	  // 获取AM对应的Container
	  // 1、Scheduler返回资源之前,向 RMContainerImpl发送 ACQUIRED 事件
      Allocation amContainerAllocation =
          appAttempt.scheduler.allocate(appAttempt.applicationAttemptId,
            EMPTY_CONTAINER_REQUEST_LIST, EMPTY_CONTAINER_RELEASE_LIST, null,
            null);
      
      // 2、向 RMStateStore 发送 STORE_APP_ATTEMPT 事件
      appAttempt.storeAttempt();
      return RMAppAttemptState.ALLOCATED_SAVING;
    }
  }
12.1、Scheduler返回资源之前,向 RMContainerImpl发送 ACQUIRED 事件
// FairScheduler.java
  public Allocation allocate(ApplicationAttemptId appAttemptId, List<ResourceRequest> ask, List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) {
	...
    ContainersAndNMTokensAllocation allocation = application.pullNewlyAllocatedContainersAndNMTokens();
  }

// SchedulerApplicationAttempt.java
  public synchronized ContainersAndNMTokensAllocation
      pullNewlyAllocatedContainersAndNMTokens() {
      ...
      // 向RMContainerImpl 发送 ACQUIRED 事件
      rmContainer.handle(new RMContainerEvent(rmContainer.getContainerId(),
        RMContainerEventType.ACQUIRED));
    }
  }
12.2、向 RMStateStore 发送 STORE_APP_ATTEMPT 事件
// RMAppAttemptImpl.java
  private void storeAttempt() {
    // store attempt data in a non-blocking manner to prevent dispatcher
    // thread starvation and wait for state to be saved
    rmContext.getStateStore().storeNewApplicationAttempt(this);
  }
// RMStateStore.java
  public void storeNewApplicationAttempt(RMAppAttempt appAttempt) {
 	..
    dispatcher.getEventHandler().handle(
      new RMStateStoreAppAttemptEvent(attemptState));
  }
  public RMStateStoreAppAttemptEvent(ApplicationAttemptStateData attemptState) {
    super(RMStateStoreEventType.STORE_APP_ATTEMPT);
    this.attemptState = attemptState;
  }

13、RMContainerImpl 处理 ACQUIRED 事件 && 向 RMAppImpl 发送RMAppEventType.APP_RUNNING_ON_NODE事件

// RMContainerImpl.java
  .addTransition(RMContainerState.ALLOCATED, RMContainerState.ACQUIRED,
     RMContainerEventType.ACQUIRED, new AcquiredTransition())
     
  private static final class AcquiredTransition extends BaseTransition {
    @Override
    public void transition(RMContainerImpl container, RMContainerEvent event) {      
      // ContainerAllocationExpirer 是监控AM资源是否使用,如果长期不使用需要释放资源
	  container.containerAllocationExpirer.register(container.getContainerId());
      // Tell the app
      container.eventHandler.handle(new RMAppRunningOnNodeEvent(container
          .getApplicationAttemptId().getApplicationId(), container.nodeId));
    }
  }

14、RMStateStore 处理 STORE_APP_ATTEMPT 事件 && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.ATTEMPT_NEW_SAVED事件

// RMAppImpl.java
  .addTransition(RMStateStoreState.DEFAULT, RMStateStoreState.DEFAULT,
          RMStateStoreEventType.STORE_APP_ATTEMPT, new StoreAppAttemptTransition())

  private static class StoreAppAttemptTransition implements
      SingleArcTransition<RMStateStore, RMStateStoreEvent> {
    @Override
    public void transition(RMStateStore store, RMStateStoreEvent event) {
        store.storeApplicationAttemptStateInternal(attemptState.getAttemptId(),
            attemptState);
        // 向 RMAppAttemptImpl 发送 ATTEMPT_NEW_SAVED 事件
        store.notifyApplicationAttempt(new RMAppAttemptEvent
               (attemptState.getAttemptId(),
               RMAppAttemptEventType.ATTEMPT_NEW_SAVED));
  }

15、RMAppAttemptImpl 处理 ATTEMPT_NEW_SAVED 事件 && 向 ApplicationMasterLauncher 发送AMLauncherEventType.LAUNCH事件

// RMAppAttemptImpl.java
  .addTransition(RMAppAttemptState.ALLOCATED_SAVING, 
          RMAppAttemptState.ALLOCATED,
          RMAppAttemptEventType.ATTEMPT_NEW_SAVED, new AttemptStoredTransition())

  private static final class AttemptStoredTransition extends BaseTransition {
    @Override
    public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) {
      appAttempt.launchAttempt();
    }
  }
  private void launchAttempt(){
    launchAMStartTime = System.currentTimeMillis();
    // Send event to launch the AM Container
    // 向 ApplicationMasterLauncher 发送 LAUNCH 事件
    eventHandler.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH, this));
  }

16、ApplicationMasterLauncher 处理 LAUNCH 事件 && AMLauncher对应的NodeManager通信,启动AM && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.LAUNCHED事件

//ApplicationMasterLauncher.java
  public synchronized void  handle(AMLauncherEvent appEvent) {
    case LAUNCH:
      launch(application);
  }
  private void launch(RMAppAttempt application) {
    Runnable launcher = createRunnableLauncher(application, 
        AMLauncherEventType.LAUNCH);
    masterEvents.add(launcher);
  }
  protected Runnable createRunnableLauncher(RMAppAttempt application, 
      AMLauncherEventType event) {
    Runnable launcher =
        new AMLauncher(context, application, event, getConfig());
    return launcher;
  }

//AMLauncher.java
  public void run() {
    case LAUNCH:
      try {
        LOG.info("Launching master" + application.getAppAttemptId());
        // launch 代码详见下 (与对应的NodeManager通信,启动AM)
        launch();
        // 向 RMAppAttemptImpl 发送 LAUNCHED 事件
        handler.handle(new RMAppAttemptEvent(application.getAppAttemptId(),
            RMAppAttemptEventType.LAUNCHED));
      } 
  }

AMLauncher#launch 启动AM方法:与对应的NodeManager通信,启动AM

//AMLauncher.java
  private void launch() throws IOException, YarnException {
    StartContainersResponse response =
        containerMgrProxy.startContainers(allRequests);
    LOG.info("Done launching container " + masterContainer + " for AM "
          + application.getAppAttemptId());
  }
//NodeManager.java
  synchronized public StartContainersResponse startContainers(
      StartContainersRequest requests) 
  throws YarnException {
    for (StartContainerRequest request : requests.getStartContainerRequests()) {
      Token containerToken = request.getContainerToken();
      ContainerTokenIdentifier tokenId = BuilderUtils.newContainerTokenIdentifier(containerToken);
      ContainerId containerID = tokenId.getContainerID();
      ApplicationId applicationId = containerID.getApplicationAttemptId().getApplicationId();
    }
    StartContainersResponse response =
        StartContainersResponse.newInstance(null, null, null);
    return response;
  }

17、RMAppAttemptImpl 处理 LAUNCHED 事件

//RMAppAttemptImpl.java
  .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.LAUNCHED,
          RMAppAttemptEventType.LAUNCHED, new AMLaunchedTransition())

  private static class AMLaunchedTransition extends BaseTransition {
    @Override
    public void transition(RMAppAttemptImpl appAttempt,
                            RMAppAttemptEvent event) {
      // Register with AMLivelinessMonitor
      appAttempt.attemptLaunched();
    }
  }

18、ApplicationMaster启动后 调用registerApplicationMaster函数 && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.REGISTERED事件

//ApplicationMaster.java
  public void run() throws YarnException, IOException {
    RegisterApplicationMasterResponse response = amRMClient
        .registerApplicationMaster(appMasterHostname, appMasterRpcPort,
            appMasterTrackingUrl);
  }
//ApplicationMasterService.java
  public RegisterApplicationMasterResponse registerApplicationMaster(
      RegisterApplicationMasterRequest request) throws YarnException, IOException {

      this.amLivelinessMonitor.receivedPing(applicationAttemptId);
      RMApp app = this.rmContext.getRMApps().get(appID);      
      // 向 RMAppAttemptImpl 发送 REGISTERED 事件
      this.rmContext
        .getDispatcher()
        .getEventHandler()
        .handle(
          new RMAppAttemptRegistrationEvent(applicationAttemptId, request
            .getHost(), request.getRpcPort(), request.getTrackingUrl()));
      RMAuditLogger.logSuccess(app.getUser(), AuditConstants.REGISTER_AM,
        "ApplicationMasterService", appID, applicationAttemptId);
      return response;
    }
  }
  
  public RMAppAttemptRegistrationEvent(ApplicationAttemptId appAttemptId,
      String host, int rpcPort, String trackingUrl) {
    super(appAttemptId, RMAppAttemptEventType.REGISTERED);
    this.appAttemptId = appAttemptId;
    this.host = host;
    this.rpcport = rpcPort;
    this.trackingurl = trackingUrl;
  }

19、RMAppAttemptImpl 处理 REGISTERED 事件 && 向 RMAppImpl 发送RMAppEventType.ATTEMPT_REGISTERED事件

//RMAppAttemptImpl.java
  .addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.RUNNING,
          RMAppAttemptEventType.REGISTERED, new AMRegisteredTransition())

  private static final class AMRegisteredTransition extends BaseTransition {
    @Override
    public void transition(RMAppAttemptImpl appAttempt,
        RMAppAttemptEvent event) {
      // Let the app know
      // 向 RMAppImpl 发送 ATTEMPT_REGISTERED 事件
      appAttempt.eventHandler.handle(new RMAppEvent(appAttempt
          .getAppAttemptId().getApplicationId(),
          RMAppEventType.ATTEMPT_REGISTERED));
    }
  }

RMAppImpl 收到ATTEMPT_REGISTERED事件后 没有后续处理,只是更改了状态 ACCEPTED -> RUNNING

参考文章
https://blog.csdn.net/weixin_42642341/article/details/81636135
https://blog.csdn.net/liushahe2012/article/details/56064378
董西成 Hadoop技术内幕

发布了53 篇原创文章 · 获赞 50 · 访问量 2万+

猜你喜欢

转载自blog.csdn.net/xw514124202/article/details/103113811
今日推荐