Master
org.apache.spark.deploy.master.Master
main -> startSystemAndActor
-> val actor = actorSystem.actorOf(
Props(classOf[Master], host, boundPort, webUiPort, securityMgr, conf), actorName)
创建actor master
-> Master的构造方法和prestart
-> preStart()
-> context.system.scheduler.schedule(0 millis, WORKER_TIMEOUT millis, self, CheckForWorkerTimeOut)
-> 定时向自己发送 CheckForWorkerTimeOut 消息监听 worker 信息
-> receiveWithLogging -> case CheckForWorkerTimeOut -> timeOutDeadWorkers 移除超时worker
Workder
org.apache.sprak.deploy.worker.Worker
main -> startSystemAndActor
-> actorSystem.actorOf(Props(classOf[Worker], host, boundPort, webUiPort, cores, memory,
masterAkkaUrls, systemName, actorName, workDir, conf, securityMgr), name = actorName)
创建actor Worker
-> Worker的构造方法和prestart -> registerWithMaster() -> tryRegisterAllMasters
-> actor ! RegisterWorker 向master发送注册消息
->Master.receiveWithLogging.(case RegisterWorker) -> sender ! RegisteredWorker (向Worker发送注册成功消息)
->Worker.receiveWithLogging.(case RegisteredWorker)
// 定时发送心跳
-> context.system.scheduler.schedule(0 millis, HEARTBEAT_MILLIS millis, self, SendHeartbeat)
case SendHeartbeat =>
if (connected) {
master ! Heartbeat(workerId) }
Spark task submission process
org.apache.spark.deploy.SparkSubmit
main -> submit -> doRunMain -> runMain
-> val mainMethod = mainClass.getMethod("main", new Array[String](0).getClass)
-> mainMethod.invoke(null, childArgs.toArray)
反射调用执行用户代码
object WordCount{
//transformation均返回RDD,action无返回? & 相对于groupbykey 有combine 预聚合
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("wc").setMaster("spark://DESKTOP-B8SSFJJ:7077")
// SparkContext 实例在SparkSubmit(Driver端执行)
val sc = new SparkContext(conf)
}
new SparkContext(conf) ->
* spark 提交任务到集群的入口
* 主构造器
* 1.调用createSparkEnv创建sparkEnv,里面创建重要对象 ActorSystem
* createSparkEnv-> SparkEnv.createDriverEnv -> create -> AkkaUtils.createActorSystem -> doCreateActorSystem
* -> val actorSystem = ActorSystem(name, akkaConf)
* 2.创建taskScheduler 根据提交任务的url进行匹配
* SparkContext.createTaskScheduler ->val scheduler = new TaskSchedulerImpl(sc)
* ->val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls)
* SparkDeploySchedulerBackend 里面有AppClient,AppClient里有 ClientActor,
* CoarseGrainedSchedulerBackend 里面 有 DriverActor
* 继承结构: SparkDeploySchedulerBackend extends CoarseGrainedSchedulerBackend extends SchedulerBackend
* 3.创建DagScheduler
* dagScheduler = new DAGScheduler(this)
* 4.启动taskScheduler
* taskScheduler.start
sparkContext creates a sequence diagram when
taskScheduler starts, it creates DriverActor and ClientActor,
driverActor communicates with Executor, submits task to Executor , clientActor communicates
with Master, and sends a request to register the application to the master
taskScheduler.start() -> TaskSchedulerImpl.start -> SparkDeploySchedulerBackEnd.start
-> super.start() -> CoarseGrainedSchedulerBackend.start()
// TODO 通过actorSystem 创建driverActor
driverActor = actorSystem.actorOf(
// TODO 调用DriverActor构造器和 preStart方法
Props(new DriverActor(properties)), name = CoarseGrainedSchedulerBackend.ACTOR_NAME)
DriverActor -> preStart
// TODO 周期调用ReviveOffers 查看有没任务提交
context.system.scheduler.schedule(0.millis, reviveInterval.millis, self, ReviveOffers)
-> SparkDeploySchedulerBackEnd.start(){
super.start()
/**
* TODO 重要,这个参数是以后executor的实现类
* org.apache.spark.executor.CoarseGrainedExecutorBackend worker上启动Executor
*/
val command = Command("org.apache.spark.executor.CoarseGrainedExecutorBackend",
// 当前执行的application的一些情况
// 包括application最大需要多少cpucore,每个slave需要多少内存
// TODO 封装参数到ApplicationDescription
val appDesc = new ApplicationDescription(sc.appName, maxCores, sc.executorMemory, command,
appUIAddress, sc.eventLogDir, sc.eventLogCodec)
// TODO 创建AppClient 把 ApplicationDescription 通过主构造器传入
client = new AppClient(sc.env.actorSystem, masters, appDesc, this, conf)
// TODO 在这个方法里创建ClientActor用于与master通信
client.start()
-> actor = actorSystem.actorOf(Props(new ClientActor))
-> ClientActor.preStart -> registerWithMaster() -> actor ! RegisterApplication(appDescription)
Master.RegisterApplication => {
// 注册application
registerApplication(app)
// master向clientActor发送注册成功的消息
sender ! RegisteredApplication(app.id, masterUrl)
// master开始调度资源, 其实就是把任务启动到哪些worker上
schedule()
}
向master注册,并将应用信息发送给master,master将应用信息发送给worker上启动Executor(CoarseGrainedExecutorBackend)
schedule() -> Spark Master resource scheduling algorithm
The master sends a message to the worker to start the Executor
scheduler -> launchExecutor(){
/**
* TODO master发送消息给worker,把参数通过case class传递给worker,让他启动Executor
*/
worker.actor ! LaunchExecutor(masterUrl,
exec.application.id, exec.id, exec.application.desc, exec.cores, exec.memory)
/**
* TODO master向ClientActor发送消息,告诉他Executor启动了
* 324 行 val app = createApplication(description, sender) 这个sender是clientActor发送过来的
* 所以这个exec.application.driver 是 ClientActor 而不是DriverActor
*/
exec.application.driver ! ExecutorAdded(
exec.id, worker.id, worker.hostPort, exec.cores, exec.memory)
}
worker.actor ! LaunchExecutor -> Worker.LaunchExecutor
-> val manager = new ExecutorRunner;manager.start() -> fetchAndRunExecutor
-> process = builder.start() // 类似于 process.execute("bin/java .java")
-> org.apache.spark.executor.CoarseGrainedExecutorBackend.main -> run(){
// 在Executor里创建ActorSystem
val (fetcher, _) = AkkaUtils.createActorSystem(
// 跟driver建立连接
val driver = fetcher.actorSelection(driverUrl)
// CoarseGrainedExecutorBackend 真正进行通信的Actor
env.actorSystem.actorOf(
Props(classOf[CoarseGrainedExecutorBackend],
driverUrl, executorId, sparkHostPort, cores, userClassPath, env),
name = "Executor")
CoarseGrainedExecutorBackend.preStart()
// Executor向DriverActor发送RegisterExecutor消息
-> driver ! RegisterExecutor(executorId, hostPort, cores, extractLogUrls)
DriverActor.RegisterExecutor
// driveractor向executor发送消息,告诉executor注册成功
=> sender ! RegisteredExecutor;
//查看是否任务需要提交 (driveractor提交给executor) 计算
makeOffers()
}
CoarseGrainedExecutorBackend.RegisteredExecutor
// 创建一个Executor实例,用来执行业务逻辑
=> executor = new Executor(executorId, hostname, env, userClassPath, isLocal = false){
// 创建线程池
val threadPool = Utils.newDaemonCachedThreadPool("Executor task launch worker")
// Executor向driveractor发送心跳
startDriverHeartbeater
}
// 向executorrunner线程所属的worker actor,发送ExecutorStateChanged消息
-> worker ! ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode))
Worker.(case ExecutorStateChanged)
// 直接向master也发送一个ExecutorStateChanged消息
-> master ! ExecutorStateChanged(appId, execId, state, message, exitStatus)
Then continue to execute the user code to the action operator to trigger the calculation
sc.textFile("/aaa.txt")
.flatMap(_.split(" "))
.map((_,1))
.reduceByKey(_+_)
.saveAsTextFile()
saveAsTextFile(触发action) -> saveAsHadoopFile -> saveAsHadoopDataset -> self.context.runJob
-> dagScheduler.runJob() -> submitJob()
注:在SparkContext创建 DagScheduler时
private[spark] class DAGScheduler(){
private[scheduler] val eventProcessLoop = new DAGSchedulerEventProcessLoop(this)
eventProcessLoop.start() -> eventThread.start() -> EventLoop.run(){
// 事件阻塞队列,有事件进入 通过模式匹配判断事件的类型,来处理对应事件
private val eventQueue: BlockingQueue[E] = new LinkedBlockingDeque[E]()
val event = eventQueue.take()
DAGSchedulerEventProcessLoop.onReceive()
}
}
-> dagScheduler.runJob() -> DAGScheduler.submitJob(){
/**
* 先将数据封装到事件JobSubmitted中,然后将事件放入到eventProcessLoop 的阻塞队列中
* EventLoop 有eventThread 的run方法 一直运行,从队列里取出JobSubmitted 执行 onReceive
* 这个onReceive 实现类是 DagScheduler.DAGSchedulerEventProcessLoop.onReceive
*/
eventProcessLoop.post(JobSubmitted(
jobId, rdd, func2, partitions.toArray, allowLocal, callSite, waiter, properties))
}
1) stage 划分
从finalrdd 从后往前 划分stage,最后返回 finalstage,这个stage里有所有的父stage
=> DAGSchedulerEventProcessLoop.onReceive() -> case JobSubmitted -> dagScheduler.handleJobSubmitted(){
// 划分stage 创建一个Stage对象,并将stage加入DAGScheduler内部的内存缓存中
finalStage = newStage(finalRDD, partitions.size, None, jobId, callSite){
// 获取父Stage
val parentStages = getParentStages(rdd, jobId)
// 这个是最后那个stage
val stage = new Stage(id, rdd, numTasks, shuffleDep, parentStages, jobId, callSite)
stage // 这个stage就是finalStage
}
getParentStages(){
val parents = new HashSet[Stage] // 存放父stage(宽依赖)
val visited = new HashSet[RDD[_]] //
val waitingForVisit = new Stack[RDD[_]] // 存放窄依赖的下一个rdd
def visit(r: RDD[_]) {
// visited(r) = HashSet.apply = visited.contains(r)
if (!visited(r)) {
visited += r
for (dep <- r.dependencies) {
dep match {
case shufDep: ShuffleDependency[_, _, _] =>
// TODO 把宽依赖传进去,获得父stage
parents += getShuffleMapStage(shufDep, jobId)
case _ =>
// 窄依赖,父依赖加入waitingForVisit,进入while循环,下次处理
waitingForVisit.push(dep.rdd)
}
}
}
}
// finalRDD 放入stack中
waitingForVisit.push(rdd)
/**
* 循环处理直到waitingForVisit为空
*/
while (!waitingForVisit.isEmpty) {
visit(waitingForVisit.pop())
}
// TODO 如果全是窄依赖,就没有父stage,parents为空
parents.toList
}
parents += getShuffleMapStage(shufDep, jobId){
val stage =
// TODO 创建父stage
newOrUsedStage(shuffleDep.rdd, shuffleDep.rdd.partitions.size, shuffleDep, jobId,
shuffleDep.rdd.creationSite)
}
newOrUsedStage()
// 递归调用 newStage,从后往前 划分 stage ,并返回 最后那个stage (finalStage)
=> val stage = newStage(rdd, numTasks, Some(shuffleDep), jobId, callSite)
2) stage 提交 从finalStage 往前递归,找到第一个stage,从前往后提交
submitStage(finalStage)
private def submitStage(stage: Stage) {
val jobId = activeJobForStage(stage)
if (jobId.isDefined) {
logDebug("submitStage(" + stage + ")")
if (!waitingStages(stage) && !runningStages(stage) && !failedStages(stage)) {
// TODO -> 调用getMissingParentStages 去获取当前这个stage的父stage
val missing = getMissingParentStages(stage).sortBy(_.id)
logDebug("missing: " + missing)
/**
* 这里会递归调用,直到最初的stage没有父stage
* 首先提交第一个stage, stage0
* 其余stage,此时全部都在waitingStages里面
*
* 递归出口
* 判断父stage是否为空,为空意味他是第一个stage
* 从第一个stage开始提交
*/
if (missing == Nil) {
logInfo("Submitting " + stage + " (" + stage.rdd + "), which has no missing parents")
// TODO 开始提交最前的stage,从前往后提交
submitMissingTasks(stage, jobId.get)
} else {
/**
* 递归调用submit方法去提交父stage
* 这里的递归,就是stage划分算法的推动者和精髓
*
* 有父stage ,就递归当前方法
*/
for (parent <- missing) {
submitStage(parent)
}
// 将当前stage放入waitingStage等待执行的队列中
waitingStages += stage
}
}
} else {
abortStage(stage, "No active job for stage " + stage.id)
}
}
}
Submit task
submitMissingTasks() -> taskScheduler.submitTasks -> backend.reviveOffers(){
// TODO 向DriverActor发消息
driverActor ! ReviveOffers
}
-> DriverActor(case ReviveOffers ) -> makeOffers -> launchTasks()
// 向executor提交task
executorData.executorActor ! LaunchTask
-> CoarseGrainedExecutorBackend(case LaunchTask) -> executor.launchTask(){
val tr = new TaskRunner
threadPool.execute(tr)
}-> TaskRunner.run -> task.run -> runTask (ResultTask,SuffleMapTask)
--> ResultTask.runTask()
// 调用这个函数 ->rdd.iterator 迭代出数据使用func函数计算
-> func(context, rdd.iterator(partition, context))