-
- 1 spark-master脚本分析
- 2 Master类继承关系
- 3 创建保存worker、application、driver的数据结构
- 4 读取配置文件
- 4 onStart()函数
- 6 master消息循环 :receive*()函数
- 1. CheckForWorkerTimeOut消息
- 2. ElectedLeader消息
- 3 CompleteRecovery 消息
- 4 RegisterWorker 消息
- 5 RegisterApplication消息
- 6 ExecutorStateChanged 执行器状态已变化消息
- 7 DriverStateChanged
- 8 Heartbeat收到worker的心跳消息
- 9 MasterChangeAcknowledged
- 10 WorkerSchedulerStateResponse——worker发送过来的消息
- 11 WorkerLatestState worker最后状态消息
- 12 UnregisterApplication 卸载app消息
- 13 RequestSubmitDriver–请求提交driver消息
- 14 RequestKillDriver—请求杀掉driver消息
- 15 RequestDriverStatus 查询driver状态
- 16 RequestMasterState 查询master的状态
- 17 RequestExecutors 请求调整executor的数量
- 18 KillExecutors 杀掉executor消息
1 spark-master脚本分析
执行spark-master.sh脚本的过程中,会调用spark-daemon.sh脚本,最终会调用spark-class.sh脚本
## start-master.sh
# 导出SPARK_HOME环境变量
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi
# 启动的主类
CLASS="org.apache.spark.deploy.master.Master"
ORIGINAL_ARGS="$@"
. "${SPARK_HOME}/sbin/spark-config.sh"
. "${SPARK_HOME}/bin/load-spark-env.sh"
# spark默认的端口是7077
if [ "$SPARK_MASTER_PORT" = "" ]; then
SPARK_MASTER_PORT=7077
fi
# 获取主机名称
if [ "$SPARK_MASTER_HOST" = "" ]; then
case `uname` in
(SunOS)
SPARK_MASTER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"
;;
(*)
SPARK_MASTER_HOST="`hostname -f`"
;;
esac
fi
if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then
SPARK_MASTER_WEBUI_PORT=8080
fi
# 最终调用spark-daemon.sh
"${SPARK_HOME}/sbin"/spark-daemon.sh start $CLASS 1 \
--host $SPARK_MASTER_HOST --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT \
$ORIGINAL_ARGS
# spark-daemon.sh
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi
. "${SPARK_HOME}/sbin/spark-config.sh"
# start
option=$1
shift
# org.apache.spark.master.Master
command=$1
shift
# 1
instance=$1
shift
spark_rotate_log ()
{
log=$1;
num=5;
if [ -n "$2" ]; then
num=$2
fi
if [ -f "$log" ]; then # rotate logs
while [ $num -gt 1 ]; do
prev=`expr $num - 1`
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
num=$prev
done
mv "$log" "$log.$num";
fi
}
. "${SPARK_HOME}/bin/load-spark-env.sh"
if [ "$SPARK_IDENT_STRING" = "" ]; then
export SPARK_IDENT_STRING="$USER"
fi
export SPARK_PRINT_LAUNCH_COMMAND="1"
# get log directory
if [ "$SPARK_LOG_DIR" = "" ]; then
export SPARK_LOG_DIR="${SPARK_HOME}/logs"
fi
mkdir -p "$SPARK_LOG_DIR"
touch "$SPARK_LOG_DIR"/.spark_test > /dev/null 2>&1
TEST_LOG_DIR=$?
if [ "${TEST_LOG_DIR}" = "0" ]; then
rm -f "$SPARK_LOG_DIR"/.spark_test
else
chown "$SPARK_IDENT_STRING" "$SPARK_LOG_DIR"
fi
if [ "$SPARK_PID_DIR" = "" ]; then
SPARK_PID_DIR=/tmp
fi
# some variables
log="$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out"
pid="$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid"
# Set default scheduling priority
if [ "$SPARK_NICENESS" = "" ]; then
export SPARK_NICENESS=0
fi
# 该函数主要作用是后台启动spark-master,然后获取pid,判断进程是否已经起来了
execute_command() {
if [ -z ${SPARK_NO_DAEMONIZE+set} ]; then
nohup -- "$@" >> $log 2>&1 < /dev/null &
newpid="$!"
echo "$newpid" > "$pid"
# Poll for up to 5 seconds for the java process to start
for i in {1..10}
do
if [[ $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
break
fi
sleep 0.5
done
sleep 2
# Check if the process has died; in that case we'll tail the log so the user can see
if [[ ! $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
echo "failed to launch: $@"
tail -10 "$log" | sed 's/^/ /'
echo "full log in $log"
fi
else
"$@"
fi
}
run_command() {
mode="$1"
shift
mkdir -p "$SPARK_PID_DIR"
if [ -f "$pid" ]; then
TARGET_ID="$(cat "$pid")"
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
echo "$command running as process $TARGET_ID. Stop it first."
exit 1
fi
fi
if [ "$SPARK_MASTER" != "" ]; then
echo rsync from "$SPARK_MASTER"
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' "$SPARK_MASTER/" "${SPARK_HOME}"
fi
spark_rotate_log "$log"
echo "starting $command, logging to $log"
case "$mode" in
(class)
execute_command nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class "$command" "$@"
;;
(submit)
execute_command nice -n "$SPARK_NICENESS" bash "${SPARK_HOME}"/bin/spark-submit --class "$command" "$@"
;;
(*)
echo "unknown mode: $mode"
exit 1
;;
esac
}
case $option in
(submit)
run_command submit "$@"
;;
(start)
###
###
run_command class "$@"
;;
(stop)
if [ -f $pid ]; then
TARGET_ID="$(cat "$pid")"
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
echo "stopping $command"
kill "$TARGET_ID" && rm -f "$pid"
else
echo "no $command to stop"
fi
else
echo "no $command to stop"
fi
;;
(status)
if [ -f $pid ]; then
TARGET_ID="$(cat "$pid")"
if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
echo $command is running.
exit 0
else
echo $pid file is present but $command not running
exit 1
fi
else
echo $command not running.
exit 2
fi
;;
(*)
echo $usage
exit 1
;;
esac
spark-master.sh——–>spark-daemon.sh start org.apache.spark.master.Master —–> spark-class start …Master
2. 调用java启动Launcher.Main
java org.apache.spark.Launcher.Main org.apache.spark.master.Master
// org.apache.spark.deploy.master.Master主类
private[deploy] object Master extends Logging {
val SYSTEM_NAME = "sparkMaster"
val ENDPOINT_NAME = "Master"
def main(argStrings: Array[String]) {
Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
exitOnUncaughtException = false))
Utils.initDaemon(log)
val conf = new SparkConf
val args = new MasterArguments(argStrings, conf)
val (rpcEnv, _, _) = startRpcEnvAndEndpoint(args.host, args.port, args.webUiPort, conf)
rpcEnv.awaitTermination()
}
/**
* Start the Master and return a three tuple of:
* (1) The Master RpcEnv
* (2) The web UI bound port
* (3) The REST server bound port, if any
*/
/*
- 创建rpc环境
- 创建master对象
*/
def startRpcEnvAndEndpoint(
host: String,
port: Int,
webUiPort: Int,
conf: SparkConf): (RpcEnv, Int, Option[Int]) = {
val securityMgr = new SecurityManager(conf)
val rpcEnv = RpcEnv.create(SYSTEM_NAME, host, port, conf, securityMgr)
val masterEndpoint = rpcEnv.setupEndpoint(ENDPOINT_NAME,
new Master(rpcEnv, rpcEnv.address, webUiPort, securityMgr, conf))
val portsResponse = masterEndpoint.askSync[BoundPortsResponse](BoundPortsRequest)
(rpcEnv, portsResponse.webUIPort, portsResponse.restPort)
}
}
在新建master的过程中:
- 1. 保存worker的信息,application的信息,driver的信息
- 2. 读取配置文件
- 3.
2 Master类继承关系
// Master对象需要继承rpc端点,LeaderElectable,也就是HA机制
// rpc端点的生命周期:constructor -> onStart -> receive* -> onStop
private[deploy] class Master(
override val rpcEnv: RpcEnv,
address: RpcAddress,
webUiPort: Int,
val securityMgr: SecurityManager,
val conf: SparkConf)
extends ThreadSafeRpcEndpoint with Logging with LeaderElectable
3 创建保存worker、application、driver的数据结构
/*
1. 保存Worker的信息
2. 保存application的信息
3. 保存driver的信息
*/
val idToApp = new HashMap[String, ApplicationInfo]
private val waitingApps = new ArrayBuffer[ApplicationInfo]
val apps = new HashSet[ApplicationInfo]
private val endpointToApp = new HashMap[RpcEndpointRef, ApplicationInfo]
private val addressToApp = new HashMap[RpcAddress, ApplicationInfo]
private val completedApps = new ArrayBuffer[ApplicationInfo]
private var nextAppNumber = 0
val workers = new HashSet[WorkerInfo]
private val idToWorker = new HashMap[String, WorkerInfo]
private val addressToWorker = new HashMap[RpcAddress, WorkerInfo]
private val drivers = new HashSet[DriverInfo]
private val completedDrivers = new ArrayBuffer[DriverInfo]
private val waitingDrivers = new ArrayBuffer[DriverInfo]
4 读取配置文件
//
// 这个参数意思:在executor分配的时候,默认是在worker节点轮流分配executor,相反是先暂满一个worker节点
private val spreadOutApps = conf.getBoolean("spark.deploy.spreadOut", true)
// 没有指定使用的core(核数),默认占用所有的核
private val defaultCores = conf.getInt("spark.deploy.defaultCores", Int.MaxValue)
val reverseProxy = conf.getBoolean("spark.ui.reverseProxy", false)
if (defaultCores < 1) {
throw new SparkException("spark.deploy.defaultCores must be positive")
}
private val WORKER_TIMEOUT_MS = conf.getLong("spark.worker.timeout", 60) * 1000
private val RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
private val RETAINED_DRIVERS = conf.getInt("spark.deploy.retainedDrivers", 200)
private val REAPER_ITERATIONS = conf.getInt("spark.dead.worker.persistence", 15)
private val RECOVERY_MODE = conf.get("spark.deploy.recoveryMode", "NONE")
private val MAX_EXECUTOR_RETRIES = conf.getInt("spark.deploy.maxExecutorRetries", 10)
// Alternative application submission gateway that is stable across Spark versions
private val restServerEnabled = conf.getBoolean("spark.master.rest.enabled", true)
4 onStart()函数
/*
1. master web相关操作
2. 定时给自己发送消息:CheckForWorkerTimeOut
3. master HA高可用机制
*/
logInfo("Starting Spark master at " + masterUrl)
logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
webUi = new MasterWebUI(this, webUiPort)
webUi.bind()
masterWebUiUrl = "http://" + masterPublicAddress + ":" + webUi.boundPort
if (reverseProxy) {
masterWebUiUrl = conf.get("spark.ui.reverseProxyUrl", masterWebUiUrl)
webUi.addProxy()
logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " +
s"Applications UIs are available at $masterWebUiUrl")
}
checkForWorkerTimeOutTask = forwardMessageThread.scheduleAtFixedRate(new Runnable {
override def run(): Unit = Utils.tryLogNonFatalError {
self.send(CheckForWorkerTimeOut)
}
}, 0, WORKER_TIMEOUT_MS, TimeUnit.MILLISECONDS)
if (restServerEnabled) {
val port = conf.getInt("spark.master.rest.port", 6066)
restServer = Some(new StandaloneRestServer(address.host, port, conf, self, masterUrl))
}
restServerBoundPort = restServer.map(_.start())
masterMetricsSystem.registerSource(masterSource)
masterMetricsSystem.start()
applicationMetricsSystem.start()
// Attach the master and app metrics servlet handler to the web ui after the metrics systems are
// started.
masterMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
applicationMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
val serializer = new JavaSerializer(conf)
val (persistenceEngine_, leaderElectionAgent_) = RECOVERY_MODE match {
case "ZOOKEEPER" =>
logInfo("Persisting recovery state to ZooKeeper")
val zkFactory =
new ZooKeeperRecoveryModeFactory(conf, serializer)
(zkFactory.createPersistenceEngine(), zkFactory.createLeaderElectionAgent(this))
case "FILESYSTEM" =>
val fsFactory =
new FileSystemRecoveryModeFactory(conf, serializer)
(fsFactory.createPersistenceEngine(), fsFactory.createLeaderElectionAgent(this))
case "CUSTOM" =>
val clazz = Utils.classForName(conf.get("spark.deploy.recoveryMode.factory"))
val factory = clazz.getConstructor(classOf[SparkConf], classOf[Serializer])
.newInstance(conf, serializer)
.asInstanceOf[StandaloneRecoveryModeFactory]
(factory.createPersistenceEngine(), factory.createLeaderElectionAgent(this))
case _ =>
(new BlackHolePersistenceEngine(), new MonarchyLeaderAgent(this))
}
persistenceEngine = persistenceEngine_
leaderElectionAgent = leaderElectionAgent_
6 master消息循环 :receive*()函数
1. CheckForWorkerTimeOut消息
- 在
master
启动的时候,会给自己发送该消息 - 根据
worker
最后的心跳时间,计算出worker是否已经超时,超时把worker信息移除master的的内存空间
case CheckForWorkerTimeOut =>
timeOutDeadWorkers()
private def timeOutDeadWorkers() {
// Copy the workers into an array so we don't modify the hashset while iterating through it
val currentTime = System.currentTimeMillis()
// 根据worker最后的心跳时间,计算出worker是否已经超时
val toRemove = workers.filter(_.lastHeartbeat < currentTime - WORKER_TIMEOUT_MS).toArray
for (worker <- toRemove) {
if (worker.state != WorkerState.DEAD) {
logWarning("Removing %s because we got no heartbeat in %d seconds".format(
worker.id, WORKER_TIMEOUT_MS / 1000))
removeWorker(worker, s"Not receiving heartbeat for ${WORKER_TIMEOUT_MS / 1000} seconds")
} else {
if (worker.lastHeartbeat < currentTime - ((REAPER_ITERATIONS + 1) * WORKER_TIMEOUT_MS)) {
workers -= worker // we've seen this DEAD worker in the UI, etc. for long enough; cull it
}
}
}
}
2. ElectedLeader消息
- 从持久化引擎中读取数据(apps、drivers、workers)
- 如果持久化引擎有数据,给master的恢复状态标志赋予
正在恢复
模式 - 当master处于
正在恢复模式
中,恢复存储在持久化引擎中的数据 - 定时给自己发送
CompleteRecovery
消息
case ElectedLeader =>
// 1. 从持久化引擎中读取数据(apps、drivers、workers)
val (storedApps, storedDrivers, storedWorkers) = persistenceEngine.readPersistedData(rpcEnv)
//2. 如果持久化引擎有数据,给master的恢复状态标志赋予`正在恢复`模式
state = if (storedApps.isEmpty && storedDrivers.isEmpty && storedWorkers.isEmpty) {
RecoveryState.ALIVE
} else{
RecoveryState.RECOVERING
}
logInfo("I have been elected leader! New state: " + state)
if (state == RecoveryState.RECOVERING) {
// 3. 当master处于`正在恢复模式`中,恢复存储在持久化引擎中的数据
beginRecovery(storedApps, storedDrivers, storedWorkers)
// 4. 定时给自己发送`CompleteRecovery` 消息
recoveryCompletionTask = forwardMessageThread.schedule(new Runnable {
override def run(): Unit = Utils.tryLogNonFatalError {
self.send(CompleteRecovery)
}
}, WORKER_TIMEOUT_MS, TimeUnit.MILLISECONDS)
}
1 开始恢复
- 循环所有apps
- 把app的信息保存在master的内存中
- 给driver的发送MasterChanged信息
- 循环所有drivers
- 把driver的信息更新到master的内存中
- 循环所有的workers
- 把worker的信息更新到master的内存中
- 给worker发送MasterChanged小心
private def beginRecovery(storedApps: Seq[ApplicationInfo], storedDrivers: Seq[DriverInfo],
storedWorkers: Seq[WorkerInfo]) {
/*
1. 循环所有apps
- 把app的信息保存在master的内存中
- 给driver的发送MasterChanged信息
*/
for (app <- storedApps) {
logInfo("Trying to recover app: " + app.id)
try {
// 更新master在内存中的apps的信息,保存app的信息在master的内存中
registerApplication(app)
app.state = ApplicationState.UNKNOWN
app.driver.send(MasterChanged(self, masterWebUiUrl))
} catch {
case e: Exception => logInfo("App " + app.id + " had exception on reconnect")
}
}
/*
2. 循环所有drivers
- 把driver的信息更新到master的内存中
*/
for (driver <- storedDrivers) {
// Here we just read in the list of drivers. Any drivers associated with now-lost workers
// will be re-launched when we detect that the worker is missing.
drivers += driver
}
/*
3. 循环所有的workers
- 把worker的信息更新到master的内存中
- 给worker发送MasterChanged小心
*/
for (worker <- storedWorkers) {
logInfo("Trying to recover worker: " + worker.id)
try {
registerWorker(worker)
worker.state = WorkerState.UNKNOWN
worker.endpoint.send(MasterChanged(self, masterWebUiUrl))
} catch {
case e: Exception => logInfo("Worker " + worker.id + " had exception on reconnect")
}
}
}
3 CompleteRecovery 消息
- 如果master正处于
正在恢复模式
,直接返回 - 给master的恢复状态赋予
完成恢复
模式 - 移除状态不确定的workers
- 除状态不确定的apps
- 更新等待的apps状态为运行时
-
- 过滤出driver还没有分配到worker
- driver的状态supervise为真
- 重新启动drivers
- 重新调度资源
case CompleteRecovery => completeRecovery()
private def completeRecovery() {
// Ensure "only-once" recovery semantics using a short synchronization period.
// 如果master正处于`正在恢复模式`,直接返回
if (state != RecoveryState.RECOVERING) { return }
// 给master的恢复状态赋予`完成恢复`模式
state = RecoveryState.COMPLETING_RECOVERY
// Kill off any workers and apps that didn't respond to us.
// 移除状态不确定的workers
workers.filter(_.state == WorkerState.UNKNOWN).foreach(
removeWorker(_, "Not responding for recovery"))
// 移除状态不确定的apps
apps.filter(_.state == ApplicationState.UNKNOWN).foreach(finishApplication)
// Update the state of recovered apps to RUNNING
// 更新等待的apps状态为运行时
apps.filter(_.state == ApplicationState.WAITING).foreach(_.state = ApplicationState.RUNNING)
// Reschedule drivers which were not claimed by any workers
/*
1. 过滤出driver还没有分配到worker
2. driver的状态supervise为真
3. 重新启动drivers
4. 重新调度资源
*/
drivers.filter(_.worker.isEmpty).foreach { d =>
logWarning(s"Driver ${d.id} was not found after master recovery")
if (d.desc.supervise) {
logWarning(s"Re-launching ${d.id}")
relaunchDriver(d)
} else {
removeDriver(d.id, DriverState.ERROR, None)
logWarning(s"Did not re-launch ${d.id} because it was not supervised")
}
}
state = RecoveryState.ALIVE
// 新app加入或者可用资源变化都会去调用一下该函数
schedule()
logInfo("Recovery complete - resuming operations!")
}
4 RegisterWorker 消息
- 如果master的恢复状态为待命状态,给worker发送MasterInStandby消息
- 如果worker的信息已经保存在master的内存中,给worker发送RegisterWorkerFailed消息
- 把worker的信息存储在master的内存结构中
- worker信息存储在持久化引擎中
- 给worker发送RegisteredWorker消息
- worker信息存储master内存中出错,给worker发送RegisterWorkerFailed消息
case RegisterWorker(
id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl, masterAddress) =>
logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
workerHost, workerPort, cores, Utils.megabytesToString(memory)))
// 如果master的恢复状态为待命状态,给worker发送MasterInStandby消息
if (state == RecoveryState.STANDBY) {
workerRef.send(MasterInStandby)
// 如果worker的信息已经保存在master的内存中,给worker发送RegisterWorkerFailed消息
} else if (idToWorker.contains(id)) {
workerRef.send(RegisterWorkerFailed("Duplicate worker ID"))
} else {
val worker = new WorkerInfo(id, workerHost, workerPort, cores, memory,
workerRef, workerWebUiUrl)
// 把worker的信息存储在master的内存结构中
if (registerWorker(worker)) {
// worker信息存储在持久化引擎中
persistenceEngine.addWorker(worker)
// 给worker发送RegisteredWorker消息
workerRef.send(RegisteredWorker(self, masterWebUiUrl, masterAddress))
schedule()
} else {
val workerAddress = worker.endpoint.address
logWarning("Worker registration failed. Attempted to re-register worker at same " +
"address: " + workerAddress)
// worker信息存储master内存中出错,给worker发送RegisterWorkerFailed消息
workerRef.send(RegisterWorkerFailed("Attempted to re-register worker at same address: "
+ workerAddress))
}
}
5 RegisterApplication消息
- 如果master的恢复状态处于
standby
状态,那么就忽略掉该消息 - 根据发送过来的app的描述信息和driver信息创建app描述信息
- 把app的信息保存在master的内存中
- 把app的信息持久化
- 给driver发送RegisteredApplication,表明app已经注册成功了
- master进行资源调度一下
case RegisterApplication(description, driver) =>
// TODO Prevent repeated registrations from some driver
if (state == RecoveryState.STANDBY) {
// ignore, don't send response
} else {
logInfo("Registering app " + description.name)
val app = createApplication(description, driver)
registerApplication(app)
logInfo("Registered app " + description.name + " with ID " + app.id)
persistenceEngine.addApplication(app)
driver.send(RegisteredApplication(app.id, self))
schedule()
}
6 ExecutorStateChanged 执行器状态已变化消息
- 给driver发送ExecutorUpdated消息(执行器更新消息)
- 如果executor已经结束
- 在worker中移除executor
- 在master的内存中移除app信息case DriverStateChanged(driverId, state, exception) =>
state match {
case DriverState.ERROR | DriverState.FINISHED | DriverState.KILLED | DriverState.FAILED =>
removeDriver(driverId, state, exception)
case _ =>
throw new Exception(s”Received unexpected state update for driver state”)
}
- 开启master的资源调度
case ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
// 从appId获取executor的信息
val execOption = idToApp.get(appId).flatMap(app => app.executors.get(execId))
execOption match {
case Some(exec) =>
// 获取app信息
val appInfo = idToApp(appId)
//
val oldState = exec.state
exec.state = state
//
if (state == ExecutorState.RUNNING) {
assert(oldState == ExecutorState.LAUNCHING,
s"executor $execId state transfer from $oldState to RUNNING is illegal")
appInfo.resetRetryCount()
}
// 给driver发送ExecutorUpdated消息(执行器更新消息)
exec.application.driver.send(ExecutorUpdated(execId, state, message, exitStatus, false))
// 如果executor已经结束
// 在worker中移除executor
// 在master的内存中移除app信息
if (ExecutorState.isFinished(state)) {
// Remove this executor from the worker and app
logInfo(s"Removing executor ${exec.fullId} because it is $state")
// If an application has already finished, preserve its
// state to display its information properly on the UI
//
if (!appInfo.isFinished) {
appInfo.removeExecutor(exec)
}
// 在master内存中移除executor的信息
exec.worker.removeExecutor(exec)
val normalExit = exitStatus == Some(0)
// Only retry certain number of times so we don't go into an infinite loop.
// Important note: this code path is not exercised by tests, so be very careful when
// changing this `if` condition.
if (!normalExit
&& appInfo.incrementRetryCount() >= MAX_EXECUTOR_RETRIES
&& MAX_EXECUTOR_RETRIES >= 0) { // < 0 disables this application-killing path
val execs = appInfo.executors.values
if (!execs.exists(_.state == ExecutorState.RUNNING)) {
logError(s"Application ${appInfo.desc.name} with ID ${appInfo.id} failed " +
s"${appInfo.retryCount} times; removing it")
/*
*/
removeApplication(appInfo, ApplicationState.FAILED)
}
}
}
schedule()
case None =>
logWarning(s"Got status update for unknown executor $appId/$execId")
}
1 removeApplication() 在master内存中移除app信息
- 在master内存中移除app信息
- 移除executor
- 在master内存中移除executor信息
- 向worker发送KillExecutor消息
- 向driver发送ApplicationRemoved消息1. 在master内存中移除executor信息
- 向worker发送KillExecutor消息
- 持久化引擎中移除app消息
- 开启master资源调度
def removeApplication(app: ApplicationInfo, state: ApplicationState.Value) {
if (apps.contains(app)) {
logInfo("Removing app " + app.id)
apps -= app
idToApp -= app.id
endpointToApp -= app.driver
addressToApp -= app.driver.address
// master内存中保存完成app(历史app)数量大于配置值
// RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
//
if (completedApps.size >= RETAINED_APPLICATIONS) {//200
val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1)
completedApps.take(toRemove).foreach { a =>
applicationMetricsSystem.removeSource(a.appSource)
}
completedApps.trimStart(toRemove)
}
completedApps += app // Remember it in our history
waitingApps -= app
/*
1. 在master内存中移除executor信息
2. 向worker发送KillExecutor消息 */
for (exec <- app.executors.values) {
killExecutor(exec)
}
// app标记为新的状态(这里标记为失败状态)
app.markFinished(state)
if (state != ApplicationState.FINISHED) {
// 向driver发送ApplicationRemoved消息
app.driver.send(ApplicationRemoved(state.toString))
}
// 持久化引擎中移除app消息
persistenceEngine.removeApplication(app)
// 开启master资源调度
schedule()
// Tell all workers that the application has finished, so they can clean up any app state.
// 向每一个worker发送ApplicationFinished消息
workers.foreach { w => w.endpoint.send(ApplicationFinished(app.id))
}
}
}
2 killExecutor() 在master内存中移除executor的信息
- 在master内存中移除executor信息
- 向worker发送KillExecutor消息
private def killExecutor(exec: ExecutorDesc): Unit = {
// 在master内存中移除executor的信息
exec.worker.removeExecutor(exec)
// 向worker发送KillExecutor消息
exec.worker.endpoint.send(KillExecutor(masterUrl, exec.application.id, exec.id))
// 标记executor状态为KILLED
exec.state = ExecutorState.KILLED
}
7 DriverStateChanged
- 如果driver的状态标记为错误、完成、被杀、失败,那么就移除driver
- 在master内存中移除driver
- 开启master资源调度
case DriverStateChanged(driverId, state, exception) =>
state match {
case DriverState.ERROR | DriverState.FINISHED | DriverState.KILLED | DriverState.FAILED =>
removeDriver(driverId, state, exception)
case _ =>
throw new Exception(s"Received unexpected state update for driver $driverId: $state")
}
private def removeDriver(
driverId: String,
finalState: DriverState,
exception: Option[Exception]) {
// 如果master的内存有driver的信息
drivers.find(d => d.id == driverId) match {
case Some(driver) =>
logInfo(s"Removing driver: $driverId")
drivers -= driver
// 这里主要保存是driver的历史信息
if (completedDrivers.size >= RETAINED_DRIVERS) {
val toRemove = math.max(RETAINED_DRIVERS / 10, 1)
completedDrivers.trimStart(toRemove)
}
completedDrivers += driver
// 从持久化引擎中移除driver信息
persistenceEngine.removeDriver(driver)
driver.state = finalState
driver.exception = exception
// 在master内存中移除driver信息
driver.worker.foreach(w => w.removeDriver(driver))
// master开启资源调度
schedule()
case None =>
logWarning(s"Asked to remove unknown driver: $driverId")
}
}
}
8 Heartbeat收到worker的心跳消息
- 如果发送过来的worker已经保存在master的内存中,那么就更新该worker的最后心跳时间
- 如果当前master内存中还没有worker的信息,那么向worker发送ReconnectWorker消息
case Heartbeat(workerId, worker) =>
idToWorker.get(workerId) match {
case Some(workerInfo) =>
workerInfo.lastHeartbeat = System.currentTimeMillis()
case None =>
if (workers.map(_.id).contains(workerId)) {
logWarning(s"Got heartbeat from unregistered worker $workerId." +
" Asking it to re-register.")
worker.send(ReconnectWorker(masterUrl))
} else {
logWarning(s"Got heartbeat from unregistered worker $workerId." +
" This worker was never registered, so ignoring the heartbeat.")
}
}
9 MasterChangeAcknowledged
- master内存中保存了app信息,那么把app的状态标记为等待状态
case MasterChangeAcknowledged(appId) =>
// master内存中保存了app信息,那么把app的状态标记为等待状态
idToApp.get(appId) match {
case Some(app) =>
logInfo("Application has been re-registered: " + appId)
app.state = ApplicationState.WAITING
case None =>
logWarning("Master change ack from unknown app: " + appId)
}
10 WorkerSchedulerStateResponse——worker发送过来的消息
- 更新master内存中executor信息
- 更新master内存中driver信息
case WorkerSchedulerStateResponse(workerId, executors, driverIds) =>
// 判断master内存中保存发送过来的worker信息
idToWorker.get(workerId) match {
case Some(worker) =>
logInfo("Worker has been re-registered: " + workerId)
// 标记worker 的状态为ALIVE(活动状态)
worker.state = WorkerState.ALIVE
// 在master内存过滤出app运行的有效的executor
val validExecutors = executors.filter(exec => idToApp.get(exec.appId).isDefined)
// 循环master内存中保存的所有有效executor
for (exec <- validExecutors) {
// 获取executor中执行的app信息
val app = idToApp.get(exec.appId).get
val execInfo = app.addExecutor(worker, exec.cores, Some(exec.execId))
worker.addExecutor(execInfo)
execInfo.copyState(exec)
}
// 更新master的driver信息
for (driverId <- driverIds) {
drivers.find(_.id == driverId).foreach { driver =>
driver.worker = Some(worker)
driver.state = DriverState.RUNNING
worker.addDriver(driver)
}
}
case None =>
logWarning("Scheduler state from unknown worker: " + workerId)
}
11 WorkerLatestState worker最后状态消息
- worker发送过来的executor信息,如果在master内存中找不到,给worker发送KillExecutor消息
- worker发送过来的driver信息,如果在master内存中找不到,给worker发送KillDriver消息
case WorkerLatestState(workerId, executors, driverIds) =>
idToWorker.get(workerId) match {
case Some(worker) =>
for (exec <- executors) {
val executorMatches = worker.executors.exists {
case (_, e) => e.application.id == exec.appId && e.id == exec.execId
}
if (!executorMatches) {
// master doesn't recognize this executor. So just tell worker to kill it.
worker.endpoint.send(KillExecutor(masterUrl, exec.appId, exec.execId))
}
}
for (driverId <- driverIds) {
val driverMatches = worker.drivers.exists { case (id, _) => id == driverId }
if (!driverMatches) {
// master doesn't recognize this driver. So just tell worker to kill it.
worker.endpoint.send(KillDriver(driverId))
}
}
case None =>
logWarning("Worker state from unknown worker: " + workerId)
}
12 UnregisterApplication 卸载app消息
- 在master内存中移除app、driver信息
- killExecutor()—-master内存中移除executor信息,给worker发送KillExecutor消息
- 给driver发送ApplicationRemoved消息
- 持久化引擎中移除app信息
- master资源调度
case UnregisterApplication(applicationId) =>
logInfo(s"Received unregister request from application $applicationId")
idToApp.get(applicationId).foreach(finishApplication)
private def finishApplication(app: ApplicationInfo) {
removeApplication(app, ApplicationState.FINISHED)
}
def removeApplication(app: ApplicationInfo, state: ApplicationState.Value) {
if (apps.contains(app)) {
logInfo("Removing app " + app.id)
// master内存移除app、driver信息
apps -= app
idToApp -= app.id
endpointToApp -= app.driver
addressToApp -= app.driver.address
// 度量系统
if (completedApps.size >= RETAINED_APPLICATIONS) {
val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1)
completedApps.take(toRemove).foreach { a =>
applicationMetricsSystem.removeSource(a.appSource)
}
completedApps.trimStart(toRemove)
}
completedApps += app // Remember it in our history
waitingApps -= app
// 循环app占用的executor,在master内存中移除executor信息,给worker发送KillExecutor消息
for (exec <- app.executors.values) {
killExecutor(exec)
}
// 给driver发送ApplicationRemoved消息
app.markFinished(state)
if (state != ApplicationState.FINISHED) {
app.driver.send(ApplicationRemoved(state.toString))
}
// 持久化引擎中移除app信息
persistenceEngine.removeApplication(app)
// master资源调度
schedule()
// Tell all workers that the application has finished, so they can clean up any app state.
workers.foreach { w =>
w.endpoint.send(ApplicationFinished(app.id))
}
}
}
13 RequestSubmitDriver–请求提交driver消息
- 如果master的状态不是alive,那么回复SubmitDriverResponse失败消息
- 持久化引擎保存driver信息
- 把driver信息保存在master内存中
- master资源调度
- 回复SubmitDriverResponse成功消息
case RequestSubmitDriver(description) =>
if (state != RecoveryState.ALIVE) {
val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +
"Can only accept driver submissions in ALIVE state."
// 第二个参数表明失败
context.reply(SubmitDriverResponse(self, false, None, msg))
} else {
logInfo("Driver submitted " + description.command.mainClass)
val driver = createDriver(description)
persistenceEngine.addDriver(driver)
waitingDrivers += driver
drivers.add(driver)
schedule()
// TODO: It might be good to instead have the submission client poll the master to determine
// the current status of the driver. For now it's simply "fire and forget".
context.reply(SubmitDriverResponse(self, true, Some(driver.id),
s"Driver successfully submitted as ${driver.id}"))
}
14 RequestKillDriver—请求杀掉driver消息
- 如果master的状态不是alive,那么回复KillDriverResponse失败消息
- 如果在master内存中找不到drive信息,那么回复KillDriverResponse失败消息
- 如果driver信息还在master的等待调度队列中,那么回复DriverStateChanged,driver被杀消息
- 给driver分配的Worker发送KillDriver消息,回复KillDriverResponse成功消息
case RequestKillDriver(driverId) =>
if (state != RecoveryState.ALIVE) {
val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +
s"Can only kill drivers in ALIVE state."
context.reply(KillDriverResponse(self, driverId, success = false, msg))
} else {
logInfo("Asked to kill driver " + driverId)
val driver = drivers.find(_.id == driverId)
driver match {
case Some(d) =>
if (waitingDrivers.contains(d)) {
waitingDrivers -= d
self.send(DriverStateChanged(driverId, DriverState.KILLED, None))
} else {
// We just notify the worker to kill the driver here. The final bookkeeping occurs
// on the return path when the worker submits a state change back to the master
// to notify it that the driver was successfully killed.
d.worker.foreach { w =>
w.endpoint.send(KillDriver(driverId))
}
}
// TODO: It would be nice for this to be a synchronous response
val msg = s"Kill request for $driverId submitted"
logInfo(msg)
context.reply(KillDriverResponse(self, driverId, success = true, msg))
case None =>
val msg = s"Driver $driverId has already finished or does not exist"
logWarning(msg)
context.reply(KillDriverResponse(self, driverId, success = false, msg))
}
}
15 RequestDriverStatus 查询driver状态
- 如果master不是存活状态,回复DriverStatusResponse 没有找到并带有异常消息
- 如果在正在运行和结束driver列表中找到,回复DriverStatusResponse 成功找到对应driver信息
- 没有找到,回复DriverStatusResponse 没有找到对应driver信息
case RequestDriverStatus(driverId) =>
if (state != RecoveryState.ALIVE) {
val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +
"Can only request driver status in ALIVE state."
context.reply(
DriverStatusResponse(found = false, None, None, None, Some(new Exception(msg))))
} else {
(drivers ++ completedDrivers).find(_.id == driverId) match {
case Some(driver) =>
context.reply(DriverStatusResponse(found = true, Some(driver.state),
driver.worker.map(_.id), driver.worker.map(_.hostPort), driver.exception))
case None =>
context.reply(DriverStatusResponse(found = false, None, None, None, None))
}
}
16 RequestMasterState 查询master的状态
- 回复MasterStateResponse消息,并把workers信息、apps信息、drivers信息发送出去
case RequestMasterState =>
context.reply(MasterStateResponse(
address.host, address.port, restServerBoundPort,
workers.toArray, apps.toArray, completedApps.toArray,
drivers.toArray, completedDrivers.toArray, state))
17 RequestExecutors 请求调整executor的数量
- 在master中设置app新的executor数量
- master调度资源
case RequestExecutors(appId, requestedTotal) =>
context.reply(handleRequestExecutors(appId, requestedTotal))
private def handleRequestExecutors(appId: String, requestedTotal: Int): Boolean = {
idToApp.get(appId) match {
case Some(appInfo) =>
logInfo(s"Application $appId requested to set total executors to $requestedTotal.")
// 设置app新的executor数量
appInfo.executorLimit = requestedTotal
schedule()
true
case None =>
logWarning(s"Unknown application $appId requested $requestedTotal total executors.")
false
}
}
18 KillExecutors 杀掉executor消息
- app的信息在master的内存中,过滤出app持有的executors,把executors信息从app的数据结构中移除
- executor的信息在master内存中移除
- 向Worker发送KillExecutor消息
- master资源调度
case KillExecutors(appId, executorIds) =>
val formattedExecutorIds = formatExecutorIds(executorIds)
context.reply(handleKillExecutors(appId, formattedExecutorIds))
private def handleKillExecutors(appId: String, executorIds: Seq[Int]): Boolean = {
idToApp.get(appId) match {
case Some(appInfo) =>
logInfo(s"Application $appId requests to kill executors: " + executorIds.mkString(", "))
val (known, unknown) = executorIds.partition(appInfo.executors.contains)
known.foreach { executorId =>
val desc = appInfo.executors(executorId)
appInfo.removeExecutor(desc)
killExecutor(desc)
}
if (unknown.nonEmpty) {
logWarning(s"Application $appId attempted to kill non-existent executors: "
+ unknown.mkString(", "))
}
schedule()
true
case None =>
logWarning(s"Unregistered application $appId requested us to kill executors!")
false
}
}