spark是分布式部署的,每个Task最终都运行在不同的机器节点上,map任务的输出结果直接存储到map任务所在的机器的存储体系,reduce极有可能不再同一个机器上运行,所以需要远程下载map任务的中间输出。所以存储系统中也包含ShuffleClient。
在BlockManager 176行initialize方法中初始化了shuffleClient:
def initialize(appId: String): Unit = {
blockTransferService.init(this)
shuffleClient.init(appId)
默认为NettyBlockTransferService 在其53行,初始化步骤为:
创建RpcServer
构造TransportContext
创建Rpc客户端工厂TransportClientFactory
创建Netty服务器TransportServer
override def init(blockDataManager: BlockDataManager): Unit = {
val rpcHandler = new NettyBlockRpcServer(conf.getAppId, serializer, blockDataManager)
var serverBootstrap: Option[TransportServerBootstrap] = None
var clientBootstrap: Option[TransportClientBootstrap] = None
if (authEnabled) {
serverBootstrap = Some(new SaslServerBootstrap(transportConf, securityManager))
clientBootstrap = Some(new SaslClientBootstrap(transportConf, conf.getAppId, securityManager,
securityManager.isSaslEncryptionEnabled()))
}
transportContext = new TransportContext(transportConf, rpcHandler)
clientFactory = transportContext.createClientFactory(clientBootstrap.toSeq.asJava)
server = createServer(serverBootstrap.toList)
appId = conf.getAppId
logInfo("Server created on " + server.getPort)
}
首先来看RpcServer NettyBlockRpcServer的48行receive方法:
可以看出该Rpc能提供文件的上传和下载
override def receive(
client: TransportClient,
rpcMessage: ByteBuffer,
responseContext: RpcResponseCallback): Unit = {
val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage)
logTrace(s"Received request: $message")
message match {
//下载Block操作
case openBlocks: OpenBlocks =>
val blocks: Seq[ManagedBuffer] =
openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData)
val streamId = streamManager.registerStream(appId, blocks.iterator.asJava)
logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer)
//上传Block操作
case uploadBlock: UploadBlock =>
// StorageLevel is serialized as bytes using our JavaSerializer.
val level: StorageLevel =
serializer.newInstance().deserialize(ByteBuffer.wrap(uploadBlock.metadata))
val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))
blockManager.putBlockData(BlockId(uploadBlock.blockId), data, level)
responseContext.onSuccess(ByteBuffer.allocate(0))
}
}
NettyBlockTransferService 62行创建TransportContext
transportContext = new TransportContext(transportConf, rpcHandler)
在TransportContext 71行:
TransportContext 既能创建Netty服务也能创建Netty访问客户端,由以下部分组成:
TransportConf:主要控制Netty框架提供的shuffle的I/O交互的客户端和服务线程数量
RpcHandler:负责shuffle的I/O服务端在接到客户端的RPC请求后提供Block的上传或者下载处理。此处为NettyBlockRpcServer。
decoder:shuffle的I/O服务端对客户端的ByteBuf进行解析。
encoder:shuffle的I/O客户端对消息进行编码。
public TransportContext(
TransportConf conf,
RpcHandler rpcHandler,
boolean closeIdleConnections) {
this.conf = conf;
this.rpcHandler = rpcHandler;
this.encoder = new MessageEncoder();
this.decoder = new MessageDecoder();
this.closeIdleConnections = closeIdleConnections;
}
NettyBlockTransferService 63行创建Rpc客户端工厂类:
TransportClient用于向Netty服务端发送Rpc请求。
clientFactory = transportContext.createClientFactory(clientBootstrap.toSeq.asJava)
创建Netty服务器TransportServer 64行:
TransportServer 提供Netty实现的服务端,用于Rpc服务上传和下载。
server = createServer(serverBootstrap.toList)
获取远程Shuffle文件
NettyBlockTransferService 80行:通过创建TransportClient的曲连接远程的Netty服务端来获取数据
override def fetchBlocks(
host: String,
port: Int,
execId: String,
blockIds: Array[String],
listener: BlockFetchingListener): Unit = {
logTrace(s"Fetch blocks from $host:$port (executor id $execId)")
try {
val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter {
override def createAndStart(blockIds: Array[String], listener: BlockFetchingListener) {
//创建客户端去连接Netty服务端
val client = clientFactory.createClient(host, port)
//去获取数据
new OneForOneBlockFetcher(client, appId, execId, blockIds.toArray, listener).start()
}
}
val maxRetries = transportConf.maxIORetries()
if (maxRetries > 0) {
// Note this Fetcher will correctly handle maxRetries == 0; we avoid it just in case there's
// a bug in this code. We should remove the if statement once we're sure of the stability.
new RetryingBlockFetcher(transportConf, blockFetchStarter, blockIds, listener).start()
} else {
//调用上面的方法createAndStart
blockFetchStarter.createAndStart(blockIds, listener)
}
} catch {
case e: Exception =>
logError("Exception while beginning fetchBlocks", e)
blockIds.foreach(listener.onBlockFetchFailure(_, e))
}
}
上传Shuffle文件
NettyBlockTransferService 114行:
创建Netty客户端。
将Block的存储级别序列化
将Block的ByteBuffer转为数组,好序列化。
将这些信息封装为UploadBlock,并序列化。
同sendRpc方法上传。
override def uploadBlock(
hostname: String,
port: Int,
execId: String,
blockId: BlockId,
blockData: ManagedBuffer,
level: StorageLevel): Future[Unit] = {
val result = Promise[Unit]()
val client = clientFactory.createClient(hostname, port)
// StorageLevel is serialized as bytes using our JavaSerializer. Everything else is encoded
// using our binary protocol.
val levelBytes = serializer.newInstance().serialize(level).array()
// Convert or copy nio buffer into array in order to serialize it.
val nioBuffer = blockData.nioByteBuffer()
val array = if (nioBuffer.hasArray) {
nioBuffer.array()
} else {
val data = new Array[Byte](nioBuffer.remaining())
nioBuffer.get(data)
data
}
client.sendRpc(new UploadBlock(appId, execId, blockId.toString, levelBytes, array).toByteBuffer,
new RpcResponseCallback {
override def onSuccess(response: ByteBuffer): Unit = {
logTrace(s"Successfully uploaded block $blockId")
result.success((): Unit)
}
override def onFailure(e: Throwable): Unit = {
logError(s"Error while uploading block $blockId", e)
result.failure(e)
}
})
result.future
}