MR source code analysis

Detailed MapReduce Shuffle: https://blog.csdn.net/zhongqi2513/article/details/78321664

// 入口：运行wordcount例子。 job的提交
job.waitForCompletion -> submit
 -> connect(连接yarn集群); 
 ->submitter.submitJobInternal ->  submitClient.submitJob
  ->  resMgrDelegate.submitApplication -> rmClient.submitApplication
    -> ApplicationClientProtocolPBClientImpl.submitApplication
      -> proxy.submitApplication 提交到ResourceManager

ResourceManger 发送指令到NodeManager上启动MRAppMaster和YarnChild
   MRAppMaster.main -> initAndStartAppMaster
   YarnChild.main -> taskFinal.run(调用给一个 Task 执行（MapTask， ReduceTask）)

-> MapTask.run -> runNewMapper(){
    
    
// 初始化环形缓冲区
output = new NewOutputCollector(taskContext, job, umbilical, reporter);
// 这个mapper实例， 就是你在写程序的时候，定义的那个 Mapper 类的实例对象！
mapper.run(mapperContext);->{
    
    
  setup(context);
  while (context.nextKeyValue()) {
    
    
                
         // 调用业务逻辑处理一个key-value
           map(context.getCurrentKey(), context.getCurrentValue(), context);
       }
   } finally {
    
    
          // 一般用来做收尾操作， 默认实现中，什么都没做
          cleanup(context);
      }
}
}

1、context.nextKeyValue() -> mapContext.nextKeyValue(); -> reader.nextKeyValue()
-> LineRecordReader.nextKeyValue(读取数据，并更新偏移量，下一次来读下一行)
2、map方法处理完后 -> context.write((KEYOUT) key, (VALUEOUT) value);-> mapContext.write(key, value);
   TaskInputOutputContextImpl.write -> output.write(key, value); 
    -> NewOutputCollector.write(){
    
    
    // 调用分区组件，给mapTask输出的key-value打上分区标记
     int partition_no = partitioner.getPartition(key, value, partitions);
     // 写入环形缓冲区
     collector.collect(key, value, partition_no); -> MapOoutputBuffer.collector
    }
    当写入80%的时候，开始溢写磁盘
    SpillThread spill线程 run -> sortAndSpill
    -> sorter.sort(MapOutputBuffer.this, mstart, mend, reporter); 快排
      //  遍历每个分区，写出真实数据，同时记录索引！
       for (int i = 0; i < partitions; ++i) {
    
    
          writer = new Writer<K, V>(job, partitionOut, keyClass, valClass, codec, spilledRecordsCounter);
          if (combinerRunner == null) {
    
    
            // 如果没有设置 combiner 直接输出 这排序好分区数据即可
            writer.append(key, value);
          }else{
    
    
            // 执行combiner 操作
            combinerRunner.combine(kvIter, combineCollector);
            -> CombineCollector.collect()->  writer.append(key, value);  写出
          }
          
       }
    
    spill后执行merge合并小文件
    mapper.run(mapperContext); 后面 output.close(mapperContext); -> collector.flush();
     -> mergeParts(){
    
    
      // 执行文件合并
      Merger.merge
      // 生成一个索引文件存放在磁盘！
      spillRec.writeToFile(finalIndexFile, job);
     }
     
   整体流程 partitioner -> kvbuffer -> sort -> combine -> spill -> merge 详见

ReduceTask

ReduceTask.run(){
    
    
// 拉取数据和合并操作
rIter = shuffleConsumerPlugin.run(); -> fetchers[i].start(); -> Fetcher.run
  -> copyFromHost 
  -> Shuffer.run.kvIter = merger.close();  合并操作 -> finalMerge(){
    
    
      Merger.merge
      Merger.writeFile // 合并的结果执行写出到磁盘文件
  }
}

runNewReducer ->  reducer.run(reducerContext){
    
    
	setup(context);
	while (context.nextKey()) {
    
    
	   reduce(context.getCurrentKey(), context.getValues(), context);
	}
}

// 读取key相同的一组数据,从上一步骤的拉取数据和合并成为的最终大磁盘文件中扫描得到
context.nextKey()  -> ReduceContextImpl.nextKey()
    public boolean nextKey() throws IOException, InterruptedException {
    
    
        /**
         *  1、hasMore， 标识还有没有下一组数据
         *  2、nextKeyIsSame， nextKeyValue();是每次读取一个key-value, 已读取的key-value的key肯定是一样的
         *   但是下一个读取到的key-vaue中的key是不是一样就不确定了
         *  3、因为所有的数据都被排序合并到一个文件了，所以只需要连续顺序扫描这个数据文件，就能扫描一段数据到一组key相同的key-value
         *  一直读到没有下一个（hashMore=False）或者下一个key-value的key不一样（nextKeyIsSame =Fasle）了，就停止。
         *  4、nextKeyValue();在读取的过程中，执行判断，并且如果key一样，就放置在一个中间容器
         */
        while (hasMore && nextKeyIsSame) {
    
    
            
            /**
             * 在读取的过程中，执行判断，并且如果key一样，就放置在一个中间容器
             */
            nextKeyValue();
        }
        if (hasMore) {
    
    
            if (inputKeyCounter != null) {
    
    
                inputKeyCounter.increment(1);
            }
            
            // 依然表示有下一组数据： key != null;
            return nextKeyValue();
        } else {
    
    
            return false;
        }
    }
    
//context.getValues() 读取中间容器中的数据：读取到这个容器的迭代器对象
reduce(context.getCurrentKey(), context.getValues(), context);
-> context.write -> NewTrackingRecordWriter.write -> LineRecordWriter.write

copy -> sort -> merge -> spill -> merge -> reduce -> write

MR source code analysis

Guess you like