spark Master启动过程及其master的消息循环机制

1 spark-master脚本分析

执行spark-master.sh脚本的过程中,会调用spark-daemon.sh脚本,最终会调用spark-class.sh脚本
## start-master.sh
# 导出SPARK_HOME环境变量
if [ -z "${SPARK_HOME}" ]; then
  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi
# 启动的主类
CLASS="org.apache.spark.deploy.master.Master"

ORIGINAL_ARGS="$@"
. "${SPARK_HOME}/sbin/spark-config.sh"
. "${SPARK_HOME}/bin/load-spark-env.sh"
# spark默认的端口是7077
if [ "$SPARK_MASTER_PORT" = "" ]; then
  SPARK_MASTER_PORT=7077
fi
# 获取主机名称
if [ "$SPARK_MASTER_HOST" = "" ]; then
  case `uname` in
      (SunOS)
      SPARK_MASTER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"
      ;;
      (*)
      SPARK_MASTER_HOST="`hostname -f`"
      ;;
  esac
fi

if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then
  SPARK_MASTER_WEBUI_PORT=8080
fi
# 最终调用spark-daemon.sh
"${SPARK_HOME}/sbin"/spark-daemon.sh start $CLASS 1 \
  --host $SPARK_MASTER_HOST --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT \
  $ORIGINAL_ARGS
#  spark-daemon.sh

if [ -z "${SPARK_HOME}" ]; then
  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi
. "${SPARK_HOME}/sbin/spark-config.sh"
# start
option=$1
shift
# org.apache.spark.master.Master
command=$1
shift
# 1
instance=$1
shift

spark_rotate_log ()
{
    log=$1;
    num=5;
    if [ -n "$2" ]; then
    num=$2
    fi
    if [ -f "$log" ]; then # rotate logs
    while [ $num -gt 1 ]; do
        prev=`expr $num - 1`
        [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
        num=$prev
    done
    mv "$log" "$log.$num";
    fi
}

. "${SPARK_HOME}/bin/load-spark-env.sh"

if [ "$SPARK_IDENT_STRING" = "" ]; then
  export SPARK_IDENT_STRING="$USER"
fi


export SPARK_PRINT_LAUNCH_COMMAND="1"

# get log directory
if [ "$SPARK_LOG_DIR" = "" ]; then
  export SPARK_LOG_DIR="${SPARK_HOME}/logs"
fi
mkdir -p "$SPARK_LOG_DIR"
touch "$SPARK_LOG_DIR"/.spark_test > /dev/null 2>&1
TEST_LOG_DIR=$?
if [ "${TEST_LOG_DIR}" = "0" ]; then
  rm -f "$SPARK_LOG_DIR"/.spark_test
else
  chown "$SPARK_IDENT_STRING" "$SPARK_LOG_DIR"
fi

if [ "$SPARK_PID_DIR" = "" ]; then
  SPARK_PID_DIR=/tmp
fi

# some variables
log="$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out"
pid="$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid"

# Set default scheduling priority
if [ "$SPARK_NICENESS" = "" ]; then
    export SPARK_NICENESS=0
fi

# 该函数主要作用是后台启动spark-master,然后获取pid,判断进程是否已经起来了
execute_command() {
  if [ -z ${SPARK_NO_DAEMONIZE+set} ]; then
      nohup -- "$@" >> $log 2>&1 < /dev/null &
      newpid="$!"

      echo "$newpid" > "$pid"

      # Poll for up to 5 seconds for the java process to start
      for i in {1..10}
      do
        if [[ $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
           break
        fi
        sleep 0.5
      done

      sleep 2
      # Check if the process has died; in that case we'll tail the log so the user can see
      if [[ ! $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
        echo "failed to launch: $@"
        tail -10 "$log" | sed 's/^/  /'
        echo "full log in $log"
      fi
  else
      "$@"
  fi
}

run_command() {
  mode="$1"
  shift

  mkdir -p "$SPARK_PID_DIR"

  if [ -f "$pid" ]; then
    TARGET_ID="$(cat "$pid")"
    if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
      echo "$command running as process $TARGET_ID.  Stop it first."
      exit 1
    fi
  fi

  if [ "$SPARK_MASTER" != "" ]; then
    echo rsync from "$SPARK_MASTER"
    rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' "$SPARK_MASTER/" "${SPARK_HOME}"
  fi

  spark_rotate_log "$log"
  echo "starting $command, logging to $log"

  case "$mode" in
    (class)
      execute_command nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class "$command" "$@"
      ;;

    (submit)
      execute_command nice -n "$SPARK_NICENESS" bash "${SPARK_HOME}"/bin/spark-submit --class "$command" "$@"
      ;;

    (*)
      echo "unknown mode: $mode"
      exit 1
      ;;
  esac

}

case $option in

  (submit)
    run_command submit "$@"
    ;;

  (start)
  ###
  ###
    run_command class "$@"
    ;;

  (stop)

    if [ -f $pid ]; then
      TARGET_ID="$(cat "$pid")"
      if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
        echo "stopping $command"
        kill "$TARGET_ID" && rm -f "$pid"
      else
        echo "no $command to stop"
      fi
    else
      echo "no $command to stop"
    fi
    ;;

  (status)

    if [ -f $pid ]; then
      TARGET_ID="$(cat "$pid")"
      if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
        echo $command is running.
        exit 0
      else
        echo $pid file is present but $command not running
        exit 1
      fi
    else
      echo $command not running.
      exit 2
    fi
    ;;

  (*)
    echo $usage
    exit 1
    ;;

esac

spark-master.sh——–>spark-daemon.sh start org.apache.spark.master.Master —–> spark-class start …Master
2. 调用java启动Launcher.Main
java org.apache.spark.Launcher.Main org.apache.spark.master.Master

// org.apache.spark.deploy.master.Master主类
private[deploy] object Master extends Logging {
  val SYSTEM_NAME = "sparkMaster"
  val ENDPOINT_NAME = "Master"

  def main(argStrings: Array[String]) {
    Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
      exitOnUncaughtException = false))
    Utils.initDaemon(log)
    val conf = new SparkConf
    val args = new MasterArguments(argStrings, conf)
    val (rpcEnv, _, _) = startRpcEnvAndEndpoint(args.host, args.port, args.webUiPort, conf)
    rpcEnv.awaitTermination()
  }

  /**
   * Start the Master and return a three tuple of:
   *   (1) The Master RpcEnv
   *   (2) The web UI bound port
   *   (3) The REST server bound port, if any
   */

/*
 - 创建rpc环境
 - 创建master对象
*/
  def startRpcEnvAndEndpoint(
      host: String,
      port: Int,
      webUiPort: Int,
      conf: SparkConf): (RpcEnv, Int, Option[Int]) = {
    val securityMgr = new SecurityManager(conf)
    val rpcEnv = RpcEnv.create(SYSTEM_NAME, host, port, conf, securityMgr)
    val masterEndpoint = rpcEnv.setupEndpoint(ENDPOINT_NAME,
      new Master(rpcEnv, rpcEnv.address, webUiPort, securityMgr, conf))
    val portsResponse = masterEndpoint.askSync[BoundPortsResponse](BoundPortsRequest)
    (rpcEnv, portsResponse.webUIPort, portsResponse.restPort)
  }
}

在新建master的过程中:
- 1. 保存worker的信息,application的信息,driver的信息
- 2. 读取配置文件
- 3.

2 Master类继承关系

// Master对象需要继承rpc端点,LeaderElectable,也就是HA机制
// rpc端点的生命周期:constructor -> onStart -> receive* -> onStop
private[deploy] class Master(
    override val rpcEnv: RpcEnv,
    address: RpcAddress,
    webUiPort: Int,
    val securityMgr: SecurityManager,
    val conf: SparkConf)
  extends ThreadSafeRpcEndpoint with Logging with LeaderElectable 

3 创建保存worker、application、driver的数据结构

/*
1. 保存Worker的信息 
2. 保存application的信息 
3. 保存driver的信息
*/

val idToApp = new HashMap[String, ApplicationInfo]
private val waitingApps = new ArrayBuffer[ApplicationInfo]
val apps = new HashSet[ApplicationInfo]
private val endpointToApp = new HashMap[RpcEndpointRef, ApplicationInfo]
private val addressToApp = new HashMap[RpcAddress, ApplicationInfo]
private val completedApps = new ArrayBuffer[ApplicationInfo]
private var nextAppNumber = 0

val workers = new HashSet[WorkerInfo]
private val idToWorker = new HashMap[String, WorkerInfo]
private val addressToWorker = new HashMap[RpcAddress, WorkerInfo]

private val drivers = new HashSet[DriverInfo]
private val completedDrivers = new ArrayBuffer[DriverInfo]
private val waitingDrivers = new ArrayBuffer[DriverInfo]

4 读取配置文件

// 
// 这个参数意思:在executor分配的时候,默认是在worker节点轮流分配executor,相反是先暂满一个worker节点
private val spreadOutApps = conf.getBoolean("spark.deploy.spreadOut", true)
// 没有指定使用的core(核数),默认占用所有的核
private val defaultCores = conf.getInt("spark.deploy.defaultCores", Int.MaxValue)
val reverseProxy = conf.getBoolean("spark.ui.reverseProxy", false)
if (defaultCores < 1) {
    throw new SparkException("spark.deploy.defaultCores must be positive")
  }
private val WORKER_TIMEOUT_MS = conf.getLong("spark.worker.timeout", 60) * 1000
private val RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
private val RETAINED_DRIVERS = conf.getInt("spark.deploy.retainedDrivers", 200)
private val REAPER_ITERATIONS = conf.getInt("spark.dead.worker.persistence", 15)
private val RECOVERY_MODE = conf.get("spark.deploy.recoveryMode", "NONE")
private val MAX_EXECUTOR_RETRIES = conf.getInt("spark.deploy.maxExecutorRetries", 10)
// Alternative application submission gateway that is stable across Spark versions
private val restServerEnabled = conf.getBoolean("spark.master.rest.enabled", true)

4 onStart()函数

/*
1. master web相关操作
2. 定时给自己发送消息:CheckForWorkerTimeOut
3. master HA高可用机制
*/
    logInfo("Starting Spark master at " + masterUrl)
    logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
    webUi = new MasterWebUI(this, webUiPort)
    webUi.bind()
    masterWebUiUrl = "http://" + masterPublicAddress + ":" + webUi.boundPort
    if (reverseProxy) {
      masterWebUiUrl = conf.get("spark.ui.reverseProxyUrl", masterWebUiUrl)
      webUi.addProxy()
      logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " +
       s"Applications UIs are available at $masterWebUiUrl")
    }
    checkForWorkerTimeOutTask = forwardMessageThread.scheduleAtFixedRate(new Runnable {
      override def run(): Unit = Utils.tryLogNonFatalError {
        self.send(CheckForWorkerTimeOut)
      }
    }, 0, WORKER_TIMEOUT_MS, TimeUnit.MILLISECONDS)

    if (restServerEnabled) {
      val port = conf.getInt("spark.master.rest.port", 6066)
      restServer = Some(new StandaloneRestServer(address.host, port, conf, self, masterUrl))
    }
    restServerBoundPort = restServer.map(_.start())

    masterMetricsSystem.registerSource(masterSource)
    masterMetricsSystem.start()
    applicationMetricsSystem.start()
    // Attach the master and app metrics servlet handler to the web ui after the metrics systems are
    // started.
    masterMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
    applicationMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)

    val serializer = new JavaSerializer(conf)
    val (persistenceEngine_, leaderElectionAgent_) = RECOVERY_MODE match {
      case "ZOOKEEPER" =>
        logInfo("Persisting recovery state to ZooKeeper")
        val zkFactory =
          new ZooKeeperRecoveryModeFactory(conf, serializer)
        (zkFactory.createPersistenceEngine(), zkFactory.createLeaderElectionAgent(this))
      case "FILESYSTEM" =>
        val fsFactory =
          new FileSystemRecoveryModeFactory(conf, serializer)
        (fsFactory.createPersistenceEngine(), fsFactory.createLeaderElectionAgent(this))
      case "CUSTOM" =>
        val clazz = Utils.classForName(conf.get("spark.deploy.recoveryMode.factory"))
        val factory = clazz.getConstructor(classOf[SparkConf], classOf[Serializer])
          .newInstance(conf, serializer)
          .asInstanceOf[StandaloneRecoveryModeFactory]
        (factory.createPersistenceEngine(), factory.createLeaderElectionAgent(this))
      case _ =>
        (new BlackHolePersistenceEngine(), new MonarchyLeaderAgent(this))
    }
    persistenceEngine = persistenceEngine_
    leaderElectionAgent = leaderElectionAgent_

6 master消息循环 :receive*()函数

1. CheckForWorkerTimeOut消息

  1. master启动的时候,会给自己发送该消息
  2. 根据worker最后的心跳时间,计算出worker是否已经超时,超时把worker信息移除master的的内存空间
case CheckForWorkerTimeOut =>
      timeOutDeadWorkers()
private def timeOutDeadWorkers() {
    // Copy the workers into an array so we don't modify the hashset while iterating through it
    val currentTime = System.currentTimeMillis()
    // 根据worker最后的心跳时间,计算出worker是否已经超时
    val toRemove = workers.filter(_.lastHeartbeat < currentTime - WORKER_TIMEOUT_MS).toArray
    for (worker <- toRemove) {
      if (worker.state != WorkerState.DEAD) {
        logWarning("Removing %s because we got no heartbeat in %d seconds".format(
          worker.id, WORKER_TIMEOUT_MS / 1000))
        removeWorker(worker, s"Not receiving heartbeat for ${WORKER_TIMEOUT_MS / 1000} seconds")
      } else {
        if (worker.lastHeartbeat < currentTime - ((REAPER_ITERATIONS + 1) * WORKER_TIMEOUT_MS)) {
          workers -= worker // we've seen this DEAD worker in the UI, etc. for long enough; cull it
        }
      }
    }
  }

2. ElectedLeader消息

  1. 从持久化引擎中读取数据(apps、drivers、workers)
  2. 如果持久化引擎有数据,给master的恢复状态标志赋予正在恢复模式
  3. 当master处于正在恢复模式中,恢复存储在持久化引擎中的数据
  4. 定时给自己发送CompleteRecovery 消息

case ElectedLeader =>
    // 1. 从持久化引擎中读取数据(apps、drivers、workers)
      val (storedApps, storedDrivers, storedWorkers) = persistenceEngine.readPersistedData(rpcEnv)
      //2. 如果持久化引擎有数据,给master的恢复状态标志赋予`正在恢复`模式
      state = if (storedApps.isEmpty && storedDrivers.isEmpty && storedWorkers.isEmpty) {
        RecoveryState.ALIVE
      } else{
        RecoveryState.RECOVERING
      }
      logInfo("I have been elected leader! New state: " + state)
      if (state == RecoveryState.RECOVERING) {
    // 3. 当master处于`正在恢复模式`中,恢复存储在持久化引擎中的数据
        beginRecovery(storedApps, storedDrivers, storedWorkers)
    // 4. 定时给自己发送`CompleteRecovery` 消息
        recoveryCompletionTask = forwardMessageThread.schedule(new Runnable {
          override def run(): Unit = Utils.tryLogNonFatalError {
            self.send(CompleteRecovery)
          }
        }, WORKER_TIMEOUT_MS, TimeUnit.MILLISECONDS)
      }

1 开始恢复

  1. 循环所有apps
    • 把app的信息保存在master的内存中
    • 给driver的发送MasterChanged信息
  2. 循环所有drivers
    • 把driver的信息更新到master的内存中
  3. 循环所有的workers
    • 把worker的信息更新到master的内存中
    • 给worker发送MasterChanged小心
private def beginRecovery(storedApps: Seq[ApplicationInfo], storedDrivers: Seq[DriverInfo],
      storedWorkers: Seq[WorkerInfo]) {
   /*
1. 循环所有apps
    -  把app的信息保存在master的内存中
    -  给driver的发送MasterChanged信息
   */
    for (app <- storedApps) {
      logInfo("Trying to recover app: " + app.id)
      try {
     // 更新master在内存中的apps的信息,保存app的信息在master的内存中
        registerApplication(app)
        app.state = ApplicationState.UNKNOWN
        app.driver.send(MasterChanged(self, masterWebUiUrl))
      } catch {
        case e: Exception => logInfo("App " + app.id + " had exception on reconnect")
      }
    }
/*
2. 循环所有drivers 
    - 把driver的信息更新到master的内存中
*/
    for (driver <- storedDrivers) {
      // Here we just read in the list of drivers. Any drivers associated with now-lost workers
      // will be re-launched when we detect that the worker is missing.
      drivers += driver
    }
/*
3.  循环所有的workers
    - 把worker的信息更新到master的内存中
    - 给worker发送MasterChanged小心 
*/
    for (worker <- storedWorkers) {
      logInfo("Trying to recover worker: " + worker.id)
      try {
        registerWorker(worker)
        worker.state = WorkerState.UNKNOWN
        worker.endpoint.send(MasterChanged(self, masterWebUiUrl))
      } catch {
        case e: Exception => logInfo("Worker " + worker.id + " had exception on reconnect")
      }
    }
  }

3 CompleteRecovery 消息

  1. 如果master正处于正在恢复模式,直接返回
  2. 给master的恢复状态赋予完成恢复模式
  3. 移除状态不确定的workers
  4. 除状态不确定的apps
  5. 更新等待的apps状态为运行时
    1. 过滤出driver还没有分配到worker
    2. driver的状态supervise为真
    3. 重新启动drivers
    4. 重新调度资源
case CompleteRecovery => completeRecovery()
 private def completeRecovery() {
    // Ensure "only-once" recovery semantics using a short synchronization period.
    // 如果master正处于`正在恢复模式`,直接返回
    if (state != RecoveryState.RECOVERING) { return }
    // 给master的恢复状态赋予`完成恢复`模式
    state = RecoveryState.COMPLETING_RECOVERY

    // Kill off any workers and apps that didn't respond to us.
    // 移除状态不确定的workers
    workers.filter(_.state == WorkerState.UNKNOWN).foreach(
      removeWorker(_, "Not responding for recovery"))
    // 移除状态不确定的apps
    apps.filter(_.state == ApplicationState.UNKNOWN).foreach(finishApplication)

    // Update the state of recovered apps to RUNNING
    // 更新等待的apps状态为运行时
    apps.filter(_.state == ApplicationState.WAITING).foreach(_.state = ApplicationState.RUNNING)

    // Reschedule drivers which were not claimed by any workers
    /*
    1. 过滤出driver还没有分配到worker
    2. driver的状态supervise为真
    3. 重新启动drivers
    4. 重新调度资源
    */
    drivers.filter(_.worker.isEmpty).foreach { d =>
      logWarning(s"Driver ${d.id} was not found after master recovery")
      if (d.desc.supervise) {
        logWarning(s"Re-launching ${d.id}")
        relaunchDriver(d)
      } else {
        removeDriver(d.id, DriverState.ERROR, None)
        logWarning(s"Did not re-launch ${d.id} because it was not supervised")
      }
    }

    state = RecoveryState.ALIVE
    // 新app加入或者可用资源变化都会去调用一下该函数
    schedule()
    logInfo("Recovery complete - resuming operations!")
  }

4 RegisterWorker 消息

  1. 如果master的恢复状态为待命状态,给worker发送MasterInStandby消息
  2. 如果worker的信息已经保存在master的内存中,给worker发送RegisterWorkerFailed消息
  3. 把worker的信息存储在master的内存结构中
    1. worker信息存储在持久化引擎中
    2. 给worker发送RegisteredWorker消息
  4. worker信息存储master内存中出错,给worker发送RegisterWorkerFailed消息
case RegisterWorker(
      id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl, masterAddress) =>
      logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
        workerHost, workerPort, cores, Utils.megabytesToString(memory)))
      // 如果master的恢复状态为待命状态,给worker发送MasterInStandby消息
      if (state == RecoveryState.STANDBY) {
        workerRef.send(MasterInStandby)
      // 如果worker的信息已经保存在master的内存中,给worker发送RegisterWorkerFailed消息
      } else if (idToWorker.contains(id)) {
        workerRef.send(RegisterWorkerFailed("Duplicate worker ID"))
      } else {
        val worker = new WorkerInfo(id, workerHost, workerPort, cores, memory,
          workerRef, workerWebUiUrl)
        // 把worker的信息存储在master的内存结构中
        if (registerWorker(worker)) {
        // worker信息存储在持久化引擎中
          persistenceEngine.addWorker(worker)
          // 给worker发送RegisteredWorker消息
          workerRef.send(RegisteredWorker(self, masterWebUiUrl, masterAddress))
          schedule()
        } else {
          val workerAddress = worker.endpoint.address
          logWarning("Worker registration failed. Attempted to re-register worker at same " +
            "address: " + workerAddress)
            // worker信息存储master内存中出错,给worker发送RegisterWorkerFailed消息
          workerRef.send(RegisterWorkerFailed("Attempted to re-register worker at same address: "
            + workerAddress))
        }
      }

5 RegisterApplication消息

  1. 如果master的恢复状态处于standby状态,那么就忽略掉该消息
  2. 根据发送过来的app的描述信息和driver信息创建app描述信息
  3. 把app的信息保存在master的内存中
  4. 把app的信息持久化
  5. 给driver发送RegisteredApplication,表明app已经注册成功了
  6. master进行资源调度一下
case RegisterApplication(description, driver) =>
      // TODO Prevent repeated registrations from some driver
      if (state == RecoveryState.STANDBY) {
        // ignore, don't send response
      } else {
        logInfo("Registering app " + description.name)
        val app = createApplication(description, driver)
        registerApplication(app)
        logInfo("Registered app " + description.name + " with ID " + app.id)
        persistenceEngine.addApplication(app)
        driver.send(RegisteredApplication(app.id, self))
        schedule()
      }

6 ExecutorStateChanged 执行器状态已变化消息

  1. 给driver发送ExecutorUpdated消息(执行器更新消息)
  2. 如果executor已经结束
    1. 在worker中移除executor
    2. 在master的内存中移除app信息case DriverStateChanged(driverId, state, exception) =>
      state match {
      case DriverState.ERROR | DriverState.FINISHED | DriverState.KILLED | DriverState.FAILED =>
      removeDriver(driverId, state, exception)
      case _ =>
      throw new Exception(s”Received unexpected state update for driver d r i v e r I d : state”)
      }
  3. 开启master的资源调度
case ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
      // 从appId获取executor的信息
      val execOption = idToApp.get(appId).flatMap(app => app.executors.get(execId))
      execOption match {
        case Some(exec) =>
    // 获取app信息
          val appInfo = idToApp(appId)
          // 
          val oldState = exec.state
          exec.state = state
    // 
          if (state == ExecutorState.RUNNING) {
            assert(oldState == ExecutorState.LAUNCHING,
              s"executor $execId state transfer from $oldState to RUNNING is illegal")
            appInfo.resetRetryCount()
          }
    // 给driver发送ExecutorUpdated消息(执行器更新消息)
          exec.application.driver.send(ExecutorUpdated(execId, state, message, exitStatus, false))
    // 如果executor已经结束
    // 在worker中移除executor
    // 在master的内存中移除app信息
          if (ExecutorState.isFinished(state)) {
            // Remove this executor from the worker and app
            logInfo(s"Removing executor ${exec.fullId} because it is $state")
            // If an application has already finished, preserve its
            // state to display its information properly on the UI
            // 
            if (!appInfo.isFinished) {
              appInfo.removeExecutor(exec)
            }
            // 在master内存中移除executor的信息
            exec.worker.removeExecutor(exec)

            val normalExit = exitStatus == Some(0)
            // Only retry certain number of times so we don't go into an infinite loop.
            // Important note: this code path is not exercised by tests, so be very careful when
            // changing this `if` condition.
            if (!normalExit
                && appInfo.incrementRetryCount() >= MAX_EXECUTOR_RETRIES
                && MAX_EXECUTOR_RETRIES >= 0) { // < 0 disables this application-killing path
              val execs = appInfo.executors.values
              if (!execs.exists(_.state == ExecutorState.RUNNING)) {
                logError(s"Application ${appInfo.desc.name} with ID ${appInfo.id} failed " +
                  s"${appInfo.retryCount} times; removing it")
                /*

        */
                removeApplication(appInfo, ApplicationState.FAILED)
              }
            }
          }
          schedule()
        case None =>
          logWarning(s"Got status update for unknown executor $appId/$execId")
      }

1 removeApplication() 在master内存中移除app信息

  1. 在master内存中移除app信息
  2. 移除executor
    1. 在master内存中移除executor信息
    2. 向worker发送KillExecutor消息
  3. 向driver发送ApplicationRemoved消息1. 在master内存中移除executor信息
  4. 向worker发送KillExecutor消息
  5. 持久化引擎中移除app消息
  6. 开启master资源调度
 def removeApplication(app: ApplicationInfo, state: ApplicationState.Value) {
    if (apps.contains(app)) {
      logInfo("Removing app " + app.id)
      apps -= app
      idToApp -= app.id
      endpointToApp -= app.driver
      addressToApp -= app.driver.address
    // master内存中保存完成app(历史app)数量大于配置值
    // RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
      // 
      if (completedApps.size >= RETAINED_APPLICATIONS) {//200
        val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1)
        completedApps.take(toRemove).foreach { a =>
          applicationMetricsSystem.removeSource(a.appSource)
        }
        completedApps.trimStart(toRemove)
      }
      completedApps += app // Remember it in our history
      waitingApps -= app
      /*
        1. 在master内存中移除executor信息
        2. 向worker发送KillExecutor消息 */
      for (exec <- app.executors.values) {
        killExecutor(exec)
      }
      // app标记为新的状态(这里标记为失败状态)
      app.markFinished(state)
      if (state != ApplicationState.FINISHED) {
      // 向driver发送ApplicationRemoved消息
        app.driver.send(ApplicationRemoved(state.toString))
      }
      // 持久化引擎中移除app消息
      persistenceEngine.removeApplication(app)
      // 开启master资源调度
      schedule()

      // Tell all workers that the application has finished, so they can clean up any app state.
      // 向每一个worker发送ApplicationFinished消息
      workers.foreach { w => w.endpoint.send(ApplicationFinished(app.id))
      }
    }
  }

2 killExecutor() 在master内存中移除executor的信息

  1. 在master内存中移除executor信息
  2. 向worker发送KillExecutor消息
private def killExecutor(exec: ExecutorDesc): Unit = {
    // 在master内存中移除executor的信息
    exec.worker.removeExecutor(exec)
    // 向worker发送KillExecutor消息
    exec.worker.endpoint.send(KillExecutor(masterUrl, exec.application.id, exec.id))
    // 标记executor状态为KILLED
    exec.state = ExecutorState.KILLED
}

7 DriverStateChanged

  1. 如果driver的状态标记为错误、完成、被杀、失败,那么就移除driver
  2. 在master内存中移除driver
  3. 开启master资源调度
case DriverStateChanged(driverId, state, exception) =>
    state match {
       case DriverState.ERROR | DriverState.FINISHED | DriverState.KILLED | DriverState.FAILED =>
         removeDriver(driverId, state, exception)
       case _ =>
         throw new Exception(s"Received unexpected state update for driver $driverId: $state")
    }
private def removeDriver(
      driverId: String,
      finalState: DriverState,
      exception: Option[Exception]) {
    // 如果master的内存有driver的信息
    drivers.find(d => d.id == driverId) match {
      case Some(driver) =>
        logInfo(s"Removing driver: $driverId")
        drivers -= driver
        // 这里主要保存是driver的历史信息
        if (completedDrivers.size >= RETAINED_DRIVERS) {
          val toRemove = math.max(RETAINED_DRIVERS / 10, 1)
          completedDrivers.trimStart(toRemove)
        }
        completedDrivers += driver
        // 从持久化引擎中移除driver信息
        persistenceEngine.removeDriver(driver)
        driver.state = finalState
        driver.exception = exception
        // 在master内存中移除driver信息
        driver.worker.foreach(w => w.removeDriver(driver))
        // master开启资源调度
        schedule()
      case None =>
        logWarning(s"Asked to remove unknown driver: $driverId")
    }
  }
}

8 Heartbeat收到worker的心跳消息

  1. 如果发送过来的worker已经保存在master的内存中,那么就更新该worker的最后心跳时间
  2. 如果当前master内存中还没有worker的信息,那么向worker发送ReconnectWorker消息
case Heartbeat(workerId, worker) =>
      idToWorker.get(workerId) match {
        case Some(workerInfo) =>
          workerInfo.lastHeartbeat = System.currentTimeMillis()
        case None =>
          if (workers.map(_.id).contains(workerId)) {
            logWarning(s"Got heartbeat from unregistered worker $workerId." +
              " Asking it to re-register.")
            worker.send(ReconnectWorker(masterUrl))
          } else {
            logWarning(s"Got heartbeat from unregistered worker $workerId." +
              " This worker was never registered, so ignoring the heartbeat.")
          }
      }

9 MasterChangeAcknowledged

  1. master内存中保存了app信息,那么把app的状态标记为等待状态
case MasterChangeAcknowledged(appId) =>
    // master内存中保存了app信息,那么把app的状态标记为等待状态
      idToApp.get(appId) match {
        case Some(app) =>
          logInfo("Application has been re-registered: " + appId)
          app.state = ApplicationState.WAITING
        case None =>
          logWarning("Master change ack from unknown app: " + appId)
      }

10 WorkerSchedulerStateResponse——worker发送过来的消息

  1. 更新master内存中executor信息
  2. 更新master内存中driver信息
case WorkerSchedulerStateResponse(workerId, executors, driverIds) =>
      // 判断master内存中保存发送过来的worker信息
      idToWorker.get(workerId) match {
        case Some(worker) =>
          logInfo("Worker has been re-registered: " + workerId)
          // 标记worker 的状态为ALIVE(活动状态)
          worker.state = WorkerState.ALIVE
    // 在master内存过滤出app运行的有效的executor
          val validExecutors = executors.filter(exec => idToApp.get(exec.appId).isDefined)
          // 循环master内存中保存的所有有效executor
          for (exec <- validExecutors) {
            // 获取executor中执行的app信息
            val app = idToApp.get(exec.appId).get
            val execInfo = app.addExecutor(worker, exec.cores, Some(exec.execId))
            worker.addExecutor(execInfo)
            execInfo.copyState(exec)
          }
    // 更新master的driver信息
          for (driverId <- driverIds) {
            drivers.find(_.id == driverId).foreach { driver =>
              driver.worker = Some(worker)
              driver.state = DriverState.RUNNING
              worker.addDriver(driver)
            }
          }
        case None =>
          logWarning("Scheduler state from unknown worker: " + workerId)
      }

11 WorkerLatestState worker最后状态消息

  1. worker发送过来的executor信息,如果在master内存中找不到,给worker发送KillExecutor消息
  2. worker发送过来的driver信息,如果在master内存中找不到,给worker发送KillDriver消息
 case WorkerLatestState(workerId, executors, driverIds) =>
      idToWorker.get(workerId) match {
        case Some(worker) =>
          for (exec <- executors) {
            val executorMatches = worker.executors.exists {
              case (_, e) => e.application.id == exec.appId && e.id == exec.execId
            }
            if (!executorMatches) {
              // master doesn't recognize this executor. So just tell worker to kill it.
              worker.endpoint.send(KillExecutor(masterUrl, exec.appId, exec.execId))
            }
          }

          for (driverId <- driverIds) {
            val driverMatches = worker.drivers.exists { case (id, _) => id == driverId }
            if (!driverMatches) {
              // master doesn't recognize this driver. So just tell worker to kill it.
              worker.endpoint.send(KillDriver(driverId))
            }
          }
        case None =>
          logWarning("Worker state from unknown worker: " + workerId)
      }

12 UnregisterApplication 卸载app消息

  1. 在master内存中移除app、driver信息
  2. killExecutor()—-master内存中移除executor信息,给worker发送KillExecutor消息
  3. 给driver发送ApplicationRemoved消息
  4. 持久化引擎中移除app信息
  5. master资源调度
case UnregisterApplication(applicationId) =>
      logInfo(s"Received unregister request from application $applicationId")
      idToApp.get(applicationId).foreach(finishApplication)

 private def finishApplication(app: ApplicationInfo) {
    removeApplication(app, ApplicationState.FINISHED)
  }
  def removeApplication(app: ApplicationInfo, state: ApplicationState.Value) {
    if (apps.contains(app)) {
      logInfo("Removing app " + app.id)
      // master内存移除app、driver信息
      apps -= app
      idToApp -= app.id
      endpointToApp -= app.driver
      addressToApp -= app.driver.address

    // 度量系统
      if (completedApps.size >= RETAINED_APPLICATIONS) {
        val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1)
        completedApps.take(toRemove).foreach { a =>
          applicationMetricsSystem.removeSource(a.appSource)
        }
        completedApps.trimStart(toRemove)
      }
      completedApps += app // Remember it in our history
      waitingApps -= app

    // 循环app占用的executor,在master内存中移除executor信息,给worker发送KillExecutor消息
      for (exec <- app.executors.values) {
        killExecutor(exec)
      }
      // 给driver发送ApplicationRemoved消息
      app.markFinished(state)
      if (state != ApplicationState.FINISHED) {
        app.driver.send(ApplicationRemoved(state.toString))
      }
      // 持久化引擎中移除app信息
      persistenceEngine.removeApplication(app)
      // master资源调度
      schedule()

      // Tell all workers that the application has finished, so they can clean up any app state.
      workers.foreach { w =>
        w.endpoint.send(ApplicationFinished(app.id))
      }
    }
  }

13 RequestSubmitDriver–请求提交driver消息

  1. 如果master的状态不是alive,那么回复SubmitDriverResponse失败消息
  2. 持久化引擎保存driver信息
  3. 把driver信息保存在master内存中
  4. master资源调度
  5. 回复SubmitDriverResponse成功消息
case RequestSubmitDriver(description) =>
      if (state != RecoveryState.ALIVE) {
        val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +
          "Can only accept driver submissions in ALIVE state."
        // 第二个参数表明失败
        context.reply(SubmitDriverResponse(self, false, None, msg))
      } else {
        logInfo("Driver submitted " + description.command.mainClass)
        val driver = createDriver(description)
        persistenceEngine.addDriver(driver)
        waitingDrivers += driver
        drivers.add(driver)
        schedule()

        // TODO: It might be good to instead have the submission client poll the master to determine
        //       the current status of the driver. For now it's simply "fire and forget".

        context.reply(SubmitDriverResponse(self, true, Some(driver.id),
          s"Driver successfully submitted as ${driver.id}"))
      }

14 RequestKillDriver—请求杀掉driver消息

  1. 如果master的状态不是alive,那么回复KillDriverResponse失败消息
  2. 如果在master内存中找不到drive信息,那么回复KillDriverResponse失败消息
  3. 如果driver信息还在master的等待调度队列中,那么回复DriverStateChanged,driver被杀消息
  4. 给driver分配的Worker发送KillDriver消息,回复KillDriverResponse成功消息
case RequestKillDriver(driverId) =>
      if (state != RecoveryState.ALIVE) {
        val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +
          s"Can only kill drivers in ALIVE state."
        context.reply(KillDriverResponse(self, driverId, success = false, msg))
      } else {
        logInfo("Asked to kill driver " + driverId)
        val driver = drivers.find(_.id == driverId)
        driver match {
          case Some(d) =>
            if (waitingDrivers.contains(d)) {
              waitingDrivers -= d
              self.send(DriverStateChanged(driverId, DriverState.KILLED, None))
            } else {
              // We just notify the worker to kill the driver here. The final bookkeeping occurs
              // on the return path when the worker submits a state change back to the master
              // to notify it that the driver was successfully killed.
              d.worker.foreach { w =>
                w.endpoint.send(KillDriver(driverId))
              }
            }
            // TODO: It would be nice for this to be a synchronous response
            val msg = s"Kill request for $driverId submitted"
            logInfo(msg)
            context.reply(KillDriverResponse(self, driverId, success = true, msg))
          case None =>
            val msg = s"Driver $driverId has already finished or does not exist"
            logWarning(msg)
            context.reply(KillDriverResponse(self, driverId, success = false, msg))
        }
      }

15 RequestDriverStatus 查询driver状态

  1. 如果master不是存活状态,回复DriverStatusResponse 没有找到并带有异常消息
  2. 如果在正在运行和结束driver列表中找到,回复DriverStatusResponse 成功找到对应driver信息
  3. 没有找到,回复DriverStatusResponse 没有找到对应driver信息
case RequestDriverStatus(driverId) =>
      if (state != RecoveryState.ALIVE) {
        val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +
          "Can only request driver status in ALIVE state."
        context.reply(
          DriverStatusResponse(found = false, None, None, None, Some(new Exception(msg))))
      } else {
        (drivers ++ completedDrivers).find(_.id == driverId) match {
          case Some(driver) =>
            context.reply(DriverStatusResponse(found = true, Some(driver.state),
              driver.worker.map(_.id), driver.worker.map(_.hostPort), driver.exception))
          case None =>
            context.reply(DriverStatusResponse(found = false, None, None, None, None))
        }
      }

16 RequestMasterState 查询master的状态

  1. 回复MasterStateResponse消息,并把workers信息、apps信息、drivers信息发送出去
case RequestMasterState =>
      context.reply(MasterStateResponse(
        address.host, address.port, restServerBoundPort,
        workers.toArray, apps.toArray, completedApps.toArray,
        drivers.toArray, completedDrivers.toArray, state))

17 RequestExecutors 请求调整executor的数量

  1. 在master中设置app新的executor数量
  2. master调度资源
case RequestExecutors(appId, requestedTotal) =>
      context.reply(handleRequestExecutors(appId, requestedTotal))

private def handleRequestExecutors(appId: String, requestedTotal: Int): Boolean = {
    idToApp.get(appId) match {
      case Some(appInfo) =>
        logInfo(s"Application $appId requested to set total executors to $requestedTotal.")
    // 设置app新的executor数量
        appInfo.executorLimit = requestedTotal
        schedule()
        true
      case None =>
        logWarning(s"Unknown application $appId requested $requestedTotal total executors.")
        false
    }
  }

18 KillExecutors 杀掉executor消息

  1. app的信息在master的内存中,过滤出app持有的executors,把executors信息从app的数据结构中移除
  2. executor的信息在master内存中移除
  3. 向Worker发送KillExecutor消息
  4. master资源调度
 case KillExecutors(appId, executorIds) =>
      val formattedExecutorIds = formatExecutorIds(executorIds)
      context.reply(handleKillExecutors(appId, formattedExecutorIds))

private def handleKillExecutors(appId: String, executorIds: Seq[Int]): Boolean = {
    idToApp.get(appId) match {
      case Some(appInfo) =>
        logInfo(s"Application $appId requests to kill executors: " + executorIds.mkString(", "))
        val (known, unknown) = executorIds.partition(appInfo.executors.contains)
        known.foreach { executorId =>
          val desc = appInfo.executors(executorId)
          appInfo.removeExecutor(desc)
          killExecutor(desc)
        }
        if (unknown.nonEmpty) {
          logWarning(s"Application $appId attempted to kill non-existent executors: "
            + unknown.mkString(", "))
        }
        schedule()
        true
      case None =>
        logWarning(s"Unregistered application $appId requested us to kill executors!")
        false
    }
  }

猜你喜欢

转载自blog.csdn.net/tanliqing2010/article/details/80398767