一、流程图
基于Hadoop2.6版本
二、状态转化
- RMAppImpl 状态转化
- NEW -> NEW_SAVING (STEP 1)
- NEW_SAVING -> SUBMITED (STEP 3)
- SUBMITED -> ACCEPTED (STEP 5)
- ACCEPTED -> RUNNING (STEP 19)
- RMAppAttemptImpl 状态转化
- NEW -> SUBMITED (STEP 6)
- SUBMITED -> SCHEDULED (STEP 8)
- SCHEDULED -> ALLOCATED_SAVING (STEP 11)
- ALLOCATED_SAVING -> ALLOCATED (STEP 14)
- ALLOCATED -> LAUNCHED (STEP 16)
- LAUNCHED -> RUNNING (STEP 18)
- RMContainerImpl 状态转化
- NEW -> ALLOCATED (STEP 10)
- ALLOCATED -> ACQUIRED (STEP 12)
- ACQUIRED -> RUNNING (STEP 15)
三、代码流程
1、ClientRMService#submitApplication && 向 RMAppImpl 发送RMAppEventType.START事件
// ClientRMService.java
public SubmitApplicationResponse submitApplication(
SubmitApplicationRequest request) throws YarnException {
try {
// call RMAppManager to submit application directly
rmAppManager.submitApplication(submissionContext,
System.currentTimeMillis(), user);
LOG.info("Application with id " + applicationId.getId() +
" submitted by user " + user);
}
}
// RMAppManager.java
protected void submitApplication(
ApplicationSubmissionContext submissionContext, long submitTime,
String user) throws YarnException {
RMAppImpl application =
createAndPopulateNewRMApp(submissionContext, submitTime, user, false);
try {
if (UserGroupInformation.isSecurityEnabled()) {
...
} else {
// 向RMAppImpl 发送 START事件
this.rmContext.getDispatcher().getEventHandler()
.handle(new RMAppEvent(applicationId, RMAppEventType.START));
}
}
}
2、RMAppImpl 处理 START 事件 && 向 RMStateStore 发送RMStateStoreEventType.STORE_APP事件
// RMAppImpl.java
// 收到START事件,调用RMAppNewlySavingTransition函数,并且RMAppImpl状态由NEW转化成NEW_SAVING。
.addTransition(RMAppState.NEW, RMAppState.NEW_SAVING,
RMAppEventType.START, new RMAppNewlySavingTransition())
// RMAppNewlySavingTransition:RMStateStore保存RMAppImpl当前信息
private static final class RMAppNewlySavingTransition extends RMAppTransition {
public void transition(RMAppImpl app, RMAppEvent event) {
LOG.info("Storing application with id " + app.applicationId);
app.rmContext.getStateStore().storeNewApplication(app);
}
}
// RMStateStore.java
public void storeNewApplication(RMApp app) {
...
dispatcher.getEventHandler().handle(new RMStateStoreAppEvent(appState));
}
// RMStateStoreAppEvent.java
public RMStateStoreAppEvent(ApplicationStateData appState) {
// RMAppImpl 向 RMStateStore 发送 RMStateStoreEventType.STORE_APP 事件
super(RMStateStoreEventType.STORE_APP);
this.appState = appState;
}
3、RMStateStore 处理 STORE_APP 事件 && 向 RMAppImpl 发送RMAppEventType.APP_NEW_SAVED事件
// RMStateStore.java
.addTransition(RMStateStoreState.DEFAULT, RMStateStoreState.DEFAULT,
RMStateStoreEventType.STORE_APP, new StoreAppTransition())
private static class StoreAppTransition
implements SingleArcTransition<RMStateStore, RMStateStoreEvent> {
@Override
public void transition(RMStateStore store, RMStateStoreEvent event) {
LOG.info("Storing info for app: " + appId);
try {
store.storeApplicationStateInternal(appId, appState);
// 向RMAppImpl 发送 APP_NEW_SAVED事件
store.notifyApplication(new RMAppEvent(appId,
RMAppEventType.APP_NEW_SAVED));
} catch (Exception e) {
LOG.error("Error storing app: " + appId, e);
store.notifyStoreOperationFailed(e);
}
};
}
4、RMAppImpl 处理 APP_NEW_SAVED 事件 && 向 ResourceManager 发送SchedulerEventType.APP_ADDED事件
// RMAppImpl.java
.addTransition(RMAppState.NEW_SAVING, RMAppState.SUBMITTED,
RMAppEventType.APP_NEW_SAVED, new AddApplicationToSchedulerTransition())
private static final class AddApplicationToSchedulerTransition extends
RMAppTransition {
@Override
public void transition(RMAppImpl app, RMAppEvent event) {
app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
app.submissionContext.getQueue(), app.user,
app.submissionContext.getReservationID()));
}
}
public AppAddedSchedulerEvent(ApplicationId applicationId, String queue,
String user, boolean isAppRecovering, ReservationId reservationID) {
// 向 ResourceManager 发送 APP_ADDED 事件
super(SchedulerEventType.APP_ADDED);
this.applicationId = applicationId;
this.queue = queue;
this.user = user;
this.reservationID = reservationID;
this.isAppRecovering = isAppRecovering;
}
5、ResourceManager 处理 APP_ADDED 事件 && 向 RMAppImpl 发送RMAppEventType.APP_ACCEPTED事件
// FairScheduler.java
public void handle(SchedulerEvent event) {
..
case APP_ADDED:
if (!(event instanceof AppAddedSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
// 进入 addApplication函数
addApplication(appAddedEvent.getApplicationId(),
appAddedEvent.getQueue(), appAddedEvent.getUser(),
appAddedEvent.getIsAppRecovering());
break;
}
private synchronized void addApplication(ApplicationId applicationId,
String queueName, String user, boolean isAppRecovering) {
...
if (isAppRecovering) {
..
} else {
// 向RMAppImpl 发送 APP_ACCEPTED 事件
rmContext.getDispatcher().getEventHandler()
.handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
}
}
6、 RMAppImpl 处理APP_ACCEPTED事件 && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.START事件
// RMAppImpl.java
.addTransition(RMAppState.SUBMITTED, RMAppState.ACCEPTED,
RMAppEventType.APP_ACCEPTED, new StartAppAttemptTransition())
private static final class StartAppAttemptTransition extends RMAppTransition {
@Override
public void transition(RMAppImpl app, RMAppEvent event) {
app.createAndStartNewAttempt(false);
};
}
private void
createAndStartNewAttempt(boolean transferStateFromPreviousAttempt) {
createNewAttempt();
handler.handle(new RMAppStartAttemptEvent(currentAttempt.getAppAttemptId(),
transferStateFromPreviousAttempt));
}
public RMAppStartAttemptEvent(ApplicationAttemptId appAttemptId,
boolean transferStateFromPreviousAttempt) {
// 向 RMAppAttemptImpl 发送 START 事件
super(appAttemptId, RMAppAttemptEventType.START);
this.transferStateFromPreviousAttempt = transferStateFromPreviousAttempt;
}
7、RMAppAttemptImpl 处理 START事件 && 向 ResourceManager 发送SchedulerEventType.APP_ATTEMPT_ADDED事件
// RMAppAttemptImpl.java
.addTransition(RMAppAttemptState.NEW, RMAppAttemptState.SUBMITTED,RMAppAttemptEventType.START,
new AttemptStartedTransition())
private static final class AttemptStartedTransition extends BaseTransition {
@Override
public void transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) {
// Register with the ApplicationMasterService
appAttempt.masterService
.registerAppAttempt(appAttempt.applicationAttemptId);
// Add the applicationAttempt to the scheduler and inform the scheduler
// whether to transfer the state from previous attempt.
appAttempt.eventHandler.handle(new AppAttemptAddedSchedulerEvent(
appAttempt.applicationAttemptId, transferStateFromPreviousAttempt));
}
}
public AppAttemptAddedSchedulerEvent(
ApplicationAttemptId applicationAttemptId,
boolean transferStateFromPreviousAttempt,
boolean isAttemptRecovering) {
// 向 ResourceManager 发送 APP_ATTEMPT_ADDED 事件
super(SchedulerEventType.APP_ATTEMPT_ADDED);
this.applicationAttemptId = applicationAttemptId;
this.transferStateFromPreviousAttempt = transferStateFromPreviousAttempt;
this.isAttemptRecovering = isAttemptRecovering;
}
8、ResourceManager 处理 APP_ATTEMPT_ADDED 事件 && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.ATTEMPT_ADDED事件
// FairScheduler.java
public void handle(SchedulerEvent event) {
..
case APP_ATTEMPT_ADDED:
addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
appAttemptAddedEvent.getIsAttemptRecovering());
}
protected synchronized void addApplicationAttempt(
ApplicationAttemptId applicationAttemptId,
boolean transferStateFromPreviousAttempt,
boolean isAttemptRecovering) {
if (isAttemptRecovering) {
} else {
// 向 RMAppAttemptImpl 发送 ATTEMPT_ADDED 事件
rmContext.getDispatcher().getEventHandler().handle(
new RMAppAttemptEvent(applicationAttemptId,
RMAppAttemptEventType.ATTEMPT_ADDED));
}
}
9、RMAppAttemptImpl 处理 ATTEMPT_ADDED 事件 && appAttempt.scheduler.allocate
// RMAppAttemptImpl.java
.addTransition(RMAppAttemptState.SUBMITTED,
EnumSet.of(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING,
RMAppAttemptState.SCHEDULED), RMAppAttemptEventType.ATTEMPT_ADDED,
new ScheduleTransition())
public static final class ScheduleTransition
implements
MultipleArcTransition<RMAppAttemptImpl, RMAppAttemptEvent, RMAppAttemptState> {
@Override
public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) {
// 获取RM是否应该管理AM的执行。如果为真,那么RM将不会为AM分配一个容器并启动它,默认是false
ApplicationSubmissionContext subCtx = appAttempt.submissionContext;
if (!subCtx.getUnmanagedAM()) {
// AM resource has been checked when submission
// <<<<<< 调度器分配资源 allocate >>>>>>>
Allocation amContainerAllocation =
appAttempt.scheduler.allocate(
appAttempt.applicationAttemptId,
Collections.singletonList(appAttempt.amReq),
EMPTY_CONTAINER_RELEASE_LIST,
amBlacklist.getAdditions(),
amBlacklist.getRemovals());
}
// 返回 SCHEDULED 事件
return RMAppAttemptState.SCHEDULED;
}
}
10、从 RMAppAttemptImpl的 allocate分配AM资源 && 向RMContainerImpl发送START事件
对于FairSchduler调度器处理过程简单描述:
- 1、收到Node_UPDATE事件
- 2、FairScheduler#nodeUpdate -> attemptScheduling
Resource assignment = queueMgr.getRootQueue().assignContainer(node) - 3、FSParentQueue#assignContainer
assigned = child.assignContainer(node); - 4、FSLeafQueue#assignContainer
assigned = sched.assignContainer(node); - 5、FSAppAttempt#assignContainer
RMContainer allocatedContainer = allocate(type, node, request.getPriority(), request, container); - 6、FSAppAttempt#allocate
rmContainer.handle(new RMContainerEvent(container.getId(), RMContainerEventType.START));
11、RMContainerImpl 处理 START 事件 && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.CONTAINER_ALLOCATED事件
// RMContainerImpl.java
.addTransition(RMContainerState.NEW, RMContainerState.ALLOCATED,
RMContainerEventType.START, new ContainerStartedTransition())
private static final class ContainerStartedTransition extends
BaseTransition {
public void transition(RMContainerImpl container, RMContainerEvent event) {
container.eventHandler.handle(new RMAppAttemptContainerAllocatedEvent(
container.appAttemptId));
}
}
public RMAppAttemptContainerAllocatedEvent(ApplicationAttemptId appAttemptId) {
// 向 RMAppAttemptImpl 发送 CONTAINER_ALLOCATED 事件
super(appAttemptId, RMAppAttemptEventType.CONTAINER_ALLOCATED);
}
12、RMAppAttemptImpl 处理 CONTAINER_ALLOCATED 事件 && 向 RMContainerImpl发送 ACQUIRED 事件 && 向 RMStateStore 发送 STORE_APP_ATTEMPT 事件
// RMAppAttemptImpl.java
.addTransition(RMAppAttemptState.SCHEDULED, EnumSet.of(RMAppAttemptState.ALLOCATED_SAVING, RMAppAttemptState.SCHEDULED), RMAppAttemptEventType.CONTAINER_ALLOCATED,
new AMContainerAllocatedTransition())
private static final class AMContainerAllocatedTransition
implements
MultipleArcTransition<RMAppAttemptImpl, RMAppAttemptEvent, RMAppAttemptState> {
@Override
public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) {
// Acquire the AM container from the scheduler.
// 获取AM对应的Container
// 1、Scheduler返回资源之前,向 RMContainerImpl发送 ACQUIRED 事件
Allocation amContainerAllocation =
appAttempt.scheduler.allocate(appAttempt.applicationAttemptId,
EMPTY_CONTAINER_REQUEST_LIST, EMPTY_CONTAINER_RELEASE_LIST, null,
null);
// 2、向 RMStateStore 发送 STORE_APP_ATTEMPT 事件
appAttempt.storeAttempt();
return RMAppAttemptState.ALLOCATED_SAVING;
}
}
12.1、Scheduler返回资源之前,向 RMContainerImpl发送 ACQUIRED 事件
// FairScheduler.java
public Allocation allocate(ApplicationAttemptId appAttemptId, List<ResourceRequest> ask, List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) {
...
ContainersAndNMTokensAllocation allocation = application.pullNewlyAllocatedContainersAndNMTokens();
}
// SchedulerApplicationAttempt.java
public synchronized ContainersAndNMTokensAllocation
pullNewlyAllocatedContainersAndNMTokens() {
...
// 向RMContainerImpl 发送 ACQUIRED 事件
rmContainer.handle(new RMContainerEvent(rmContainer.getContainerId(),
RMContainerEventType.ACQUIRED));
}
}
12.2、向 RMStateStore 发送 STORE_APP_ATTEMPT 事件
// RMAppAttemptImpl.java
private void storeAttempt() {
// store attempt data in a non-blocking manner to prevent dispatcher
// thread starvation and wait for state to be saved
rmContext.getStateStore().storeNewApplicationAttempt(this);
}
// RMStateStore.java
public void storeNewApplicationAttempt(RMAppAttempt appAttempt) {
..
dispatcher.getEventHandler().handle(
new RMStateStoreAppAttemptEvent(attemptState));
}
public RMStateStoreAppAttemptEvent(ApplicationAttemptStateData attemptState) {
super(RMStateStoreEventType.STORE_APP_ATTEMPT);
this.attemptState = attemptState;
}
13、RMContainerImpl 处理 ACQUIRED 事件 && 向 RMAppImpl 发送RMAppEventType.APP_RUNNING_ON_NODE事件
// RMContainerImpl.java
.addTransition(RMContainerState.ALLOCATED, RMContainerState.ACQUIRED,
RMContainerEventType.ACQUIRED, new AcquiredTransition())
private static final class AcquiredTransition extends BaseTransition {
@Override
public void transition(RMContainerImpl container, RMContainerEvent event) {
// ContainerAllocationExpirer 是监控AM资源是否使用,如果长期不使用需要释放资源
container.containerAllocationExpirer.register(container.getContainerId());
// Tell the app
container.eventHandler.handle(new RMAppRunningOnNodeEvent(container
.getApplicationAttemptId().getApplicationId(), container.nodeId));
}
}
14、RMStateStore 处理 STORE_APP_ATTEMPT 事件 && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.ATTEMPT_NEW_SAVED事件
// RMAppImpl.java
.addTransition(RMStateStoreState.DEFAULT, RMStateStoreState.DEFAULT,
RMStateStoreEventType.STORE_APP_ATTEMPT, new StoreAppAttemptTransition())
private static class StoreAppAttemptTransition implements
SingleArcTransition<RMStateStore, RMStateStoreEvent> {
@Override
public void transition(RMStateStore store, RMStateStoreEvent event) {
store.storeApplicationAttemptStateInternal(attemptState.getAttemptId(),
attemptState);
// 向 RMAppAttemptImpl 发送 ATTEMPT_NEW_SAVED 事件
store.notifyApplicationAttempt(new RMAppAttemptEvent
(attemptState.getAttemptId(),
RMAppAttemptEventType.ATTEMPT_NEW_SAVED));
}
15、RMAppAttemptImpl 处理 ATTEMPT_NEW_SAVED 事件 && 向 ApplicationMasterLauncher 发送AMLauncherEventType.LAUNCH事件
// RMAppAttemptImpl.java
.addTransition(RMAppAttemptState.ALLOCATED_SAVING,
RMAppAttemptState.ALLOCATED,
RMAppAttemptEventType.ATTEMPT_NEW_SAVED, new AttemptStoredTransition())
private static final class AttemptStoredTransition extends BaseTransition {
@Override
public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) {
appAttempt.launchAttempt();
}
}
private void launchAttempt(){
launchAMStartTime = System.currentTimeMillis();
// Send event to launch the AM Container
// 向 ApplicationMasterLauncher 发送 LAUNCH 事件
eventHandler.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH, this));
}
16、ApplicationMasterLauncher 处理 LAUNCH 事件 && AMLauncher对应的NodeManager通信,启动AM && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.LAUNCHED事件
//ApplicationMasterLauncher.java
public synchronized void handle(AMLauncherEvent appEvent) {
case LAUNCH:
launch(application);
}
private void launch(RMAppAttempt application) {
Runnable launcher = createRunnableLauncher(application,
AMLauncherEventType.LAUNCH);
masterEvents.add(launcher);
}
protected Runnable createRunnableLauncher(RMAppAttempt application,
AMLauncherEventType event) {
Runnable launcher =
new AMLauncher(context, application, event, getConfig());
return launcher;
}
//AMLauncher.java
public void run() {
case LAUNCH:
try {
LOG.info("Launching master" + application.getAppAttemptId());
// launch 代码详见下 (与对应的NodeManager通信,启动AM)
launch();
// 向 RMAppAttemptImpl 发送 LAUNCHED 事件
handler.handle(new RMAppAttemptEvent(application.getAppAttemptId(),
RMAppAttemptEventType.LAUNCHED));
}
}
AMLauncher#launch 启动AM方法:与对应的NodeManager通信,启动AM
//AMLauncher.java
private void launch() throws IOException, YarnException {
StartContainersResponse response =
containerMgrProxy.startContainers(allRequests);
LOG.info("Done launching container " + masterContainer + " for AM "
+ application.getAppAttemptId());
}
//NodeManager.java
synchronized public StartContainersResponse startContainers(
StartContainersRequest requests)
throws YarnException {
for (StartContainerRequest request : requests.getStartContainerRequests()) {
Token containerToken = request.getContainerToken();
ContainerTokenIdentifier tokenId = BuilderUtils.newContainerTokenIdentifier(containerToken);
ContainerId containerID = tokenId.getContainerID();
ApplicationId applicationId = containerID.getApplicationAttemptId().getApplicationId();
}
StartContainersResponse response =
StartContainersResponse.newInstance(null, null, null);
return response;
}
17、RMAppAttemptImpl 处理 LAUNCHED 事件
//RMAppAttemptImpl.java
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.LAUNCHED,
RMAppAttemptEventType.LAUNCHED, new AMLaunchedTransition())
private static class AMLaunchedTransition extends BaseTransition {
@Override
public void transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) {
// Register with AMLivelinessMonitor
appAttempt.attemptLaunched();
}
}
18、ApplicationMaster启动后 调用registerApplicationMaster函数 && 向 RMAppAttemptImpl 发送RMAppAttemptEventType.REGISTERED事件
//ApplicationMaster.java
public void run() throws YarnException, IOException {
RegisterApplicationMasterResponse response = amRMClient
.registerApplicationMaster(appMasterHostname, appMasterRpcPort,
appMasterTrackingUrl);
}
//ApplicationMasterService.java
public RegisterApplicationMasterResponse registerApplicationMaster(
RegisterApplicationMasterRequest request) throws YarnException, IOException {
this.amLivelinessMonitor.receivedPing(applicationAttemptId);
RMApp app = this.rmContext.getRMApps().get(appID);
// 向 RMAppAttemptImpl 发送 REGISTERED 事件
this.rmContext
.getDispatcher()
.getEventHandler()
.handle(
new RMAppAttemptRegistrationEvent(applicationAttemptId, request
.getHost(), request.getRpcPort(), request.getTrackingUrl()));
RMAuditLogger.logSuccess(app.getUser(), AuditConstants.REGISTER_AM,
"ApplicationMasterService", appID, applicationAttemptId);
return response;
}
}
public RMAppAttemptRegistrationEvent(ApplicationAttemptId appAttemptId,
String host, int rpcPort, String trackingUrl) {
super(appAttemptId, RMAppAttemptEventType.REGISTERED);
this.appAttemptId = appAttemptId;
this.host = host;
this.rpcport = rpcPort;
this.trackingurl = trackingUrl;
}
19、RMAppAttemptImpl 处理 REGISTERED 事件 && 向 RMAppImpl 发送RMAppEventType.ATTEMPT_REGISTERED事件
//RMAppAttemptImpl.java
.addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.RUNNING,
RMAppAttemptEventType.REGISTERED, new AMRegisteredTransition())
private static final class AMRegisteredTransition extends BaseTransition {
@Override
public void transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) {
// Let the app know
// 向 RMAppImpl 发送 ATTEMPT_REGISTERED 事件
appAttempt.eventHandler.handle(new RMAppEvent(appAttempt
.getAppAttemptId().getApplicationId(),
RMAppEventType.ATTEMPT_REGISTERED));
}
}
RMAppImpl 收到ATTEMPT_REGISTERED事件后 没有后续处理,只是更改了状态 ACCEPTED -> RUNNING
参考文章
https://blog.csdn.net/weixin_42642341/article/details/81636135
https://blog.csdn.net/liushahe2012/article/details/56064378
董西成 Hadoop技术内幕