AV1 code learning: the main function of the coding end aomenc.c

In the aom2.0 project, aomenc is the encoding end project, which mainly completes the encoding of the video sequence, and its entry is the main function in amenc.c

The main flow of the main function is as follows:

  1. initialization
  2. Parse command line parameters
    1. parse_global_config parse global encoding configuration file parameters
    2. parse_stream_params parse video stream parameters
  3. Enter the coding process loop pass
    1. Open the input file open_input_file
    2. Set video stream and codec configuration
      1. set_stream_dimensions set the dimensions of the video stream
      2. validate_stream_config validate the configuration of the video stream
      3. show_stream_config Print shows the configuration of the video stream
    3. setup_pass Set the parameters of the current encoding process
    4. initialize_encoder initialize the encoder
    5. open_output_file Open output file
    6. Read every frame in the video in a loop
      1.  read_frame Read the current frame (if there is no limit to the number of frames read or the number of frames currently read is less than the number of frames read limit)
      2. encode_frame encode the current frame (if the currently read frame is larger than the first n frames that need to be skipped, encode the current frame)
      3. update_quantizer_histogram Update quantizer
      4. get_cx_data Get the encoded data and write it into the code stream
    7. Print related information (coding time, PSNR, etc.)
    8. Close input and output files
  4. Release memory
  5. Returns the sign of success or failure of encoding

note:

The encoding process loop is controlled by the global encoding configuration parameters passes and pass, where passes refers to the total number of encoding processes performed (1/2), pass refers to the currently executed encoding process (1/2), AV1 encoding The process is classified as follows:

enum aom_enc_pass {
  AOM_RC_ONE_PASS,   /**< Single pass mode */
  AOM_RC_FIRST_PASS, /**< First pass of multi-pass mode */
  AOM_RC_LAST_PASS   /**< Final pass of multi-pass mode */
};

AV1 is to perform the encoding process twice by default, First Pass mainly collects some information to speed up the Second Pass

The main function code and comments are as follows:

int main(int argc, const char **argv_) {
  int pass;
  aom_image_t raw;//原始帧
  aom_image_t raw_shift;//用来处理高比特深度情况下的帧
  int allocated_raw_shift = 0;
  int use_16bit_internal = 0;
  int input_shift = 0;
  int frame_avail, got_data;

  struct AvxInputContext input;//打开输入文件的上下文
  struct AvxEncoderConfig global;//编码器的配置文件
  struct stream_state *streams = NULL;
  char **argv, **argi;
  uint64_t cx_time = 0;
  int stream_cnt = 0;
  int res = 0;
  int profile_updated = 0;

  memset(&input, 0, sizeof(input));
  exec_name = argv_[0];

  /* Setup default input stream settings */
  // 设置默认输入流设置
  input.framerate.numerator = 30;
  input.framerate.denominator = 1;
  input.only_i420 = 1;
  input.bit_depth = 0;

  /* First parse the global configuration values, because we want to apply
   * other parameters on top of the default configuration provided by the
   * codec.
   */
  // 首先解析全局配置值,因为我们希望在编解码器提供的默认配置之上应用其他参数,所有流使用的编码器配置
  argv = argv_dup(argc - 1, argv_ + 1);
  parse_global_config(&global, &argv);

  if (argc < 2) usage_exit();//如果输入参数小于2,则退出

  switch (global.color_type) {//输入的文件类型
    case I420: input.fmt = AOM_IMG_FMT_I420; break;
    case I422: input.fmt = AOM_IMG_FMT_I422; break;
    case I444: input.fmt = AOM_IMG_FMT_I444; break;
    case YV12: input.fmt = AOM_IMG_FMT_YV12; break;
  }

  {
    /* Now parse each stream's parameters. Using a local scope here
     * due to the use of 'stream' as loop variable in FOREACH_STREAM
     * loops
     */
	// 现在解析每个流的参数
	// 由于在FOREACH_stream循环中使用“stream”作为循环变量,因此在此处使用局部范围
    struct stream_state *stream = NULL;

    do {
      stream = new_stream(&global, stream);//新建一个stream流
      stream_cnt++;
      if (!streams) streams = stream;//streams指向第一个stream流
    } while (parse_stream_params(&global, stream, argv));//解析stream流
  }

  /* Check for unrecognized options */
  // 检查未识别的选项
  for (argi = argv; *argi; argi++)
    if (argi[0][0] == '-' && argi[0][1])
      die("Error: Unrecognized option %s\n", *argi);

  FOREACH_STREAM(stream, streams) { //遍历所有的stream流
    check_encoder_config(global.disable_warning_prompt, &global,
                         &stream->config.cfg);

    // If large_scale_tile = 1, only support to output to ivf format.
	// 如果large_scale_tile=1,则只支持输出为ivf格式。
    if (stream->config.cfg.large_scale_tile && !stream->config.write_ivf)
      die("only support ivf output format while large-scale-tile=1\n");
  }

  /* Handle non-option arguments */
  // 处理没有命令行的情况
  input.filename = argv[0];

  if (!input.filename) {
    fprintf(stderr, "No input file specified!\n");
    usage_exit();
  }

  /* Decide if other chroma subsamplings than 4:2:0 are supported */
  // 决定是否支持4:2:0以外的其他色度子采样
  if (global.codec->fourcc == AV1_FOURCC) input.only_i420 = 0;
  
  /*************开始编码,默认编码过程分为两个PASS***************/
  for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) {
    int frames_in = 0, seen_frames = 0; //frame_in指的是读取的帧数,seen_frames表示显示的帧数
    int64_t estimated_time_left = -1;
    int64_t average_rate = -1;
    int64_t lagged_count = 0;

	//打开输入文件
    open_input_file(&input, global.csp);

    /* If the input file doesn't specify its w/h (raw files), try to get
     * the data from the first stream's configuration.
     */
	// 如果输入文件没有指定其w/h(原始文件),请尝试从第一个流的配置中获取数据。
    if (!input.width || !input.height) {
      FOREACH_STREAM(stream, streams) {
        if (stream->config.cfg.g_w && stream->config.cfg.g_h) {
          input.width = stream->config.cfg.g_w;
          input.height = stream->config.cfg.g_h;
          break;
        }
      };
    }

    /* Update stream configurations from the input file's parameters */
	// 从输入文件的参数更新流配置
    if (!input.width || !input.height)
      fatal(
          "Specify stream dimensions with --width (-w) "
          " and --height (-h)");

    /* If input file does not specify bit-depth but input-bit-depth parameter
     * exists, assume that to be the input bit-depth. However, if the
     * input-bit-depth paramter does not exist, assume the input bit-depth
     * to be the same as the codec bit-depth.
     */
	/*
	 * 如果输入文件未指定位深度,但存在输入位深度参数,
	 * 则假定为输入位深度。但是,如果输入位深度参数不存在,
	 * 则假设输入位深度与编解码器位深度相同。
	*/
    if (!input.bit_depth) {
      FOREACH_STREAM(stream, streams) {
        if (stream->config.cfg.g_input_bit_depth)
          input.bit_depth = stream->config.cfg.g_input_bit_depth;
        else
          input.bit_depth = stream->config.cfg.g_input_bit_depth =
              (int)stream->config.cfg.g_bit_depth;
      }
      if (input.bit_depth > 8) input.fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
    } else {
      FOREACH_STREAM(stream, streams) {
        stream->config.cfg.g_input_bit_depth = input.bit_depth;
      }
    }

	// 遍历所有的视频流,设置视频流的配置文件和编解码器的配置文件
    FOREACH_STREAM(stream, streams) {
      if (input.fmt != AOM_IMG_FMT_I420 && input.fmt != AOM_IMG_FMT_I42016) {
        /* Automatically upgrade if input is non-4:2:0 but a 4:2:0 profile
           was selected. */
		// 如果输入类型不是4:2:0,但选择了4:2:0配置文件,则自动升级。
        switch (stream->config.cfg.g_profile) {
          case 0:
            if (input.bit_depth < 12 && (input.fmt == AOM_IMG_FMT_I444 ||
                                         input.fmt == AOM_IMG_FMT_I44416)) {
              if (!stream->config.cfg.monochrome) {
                stream->config.cfg.g_profile = 1;
                profile_updated = 1;
              }
            } else if (input.bit_depth == 12 || input.fmt == AOM_IMG_FMT_I422 ||
                       input.fmt == AOM_IMG_FMT_I42216) {
              stream->config.cfg.g_profile = 2;
              profile_updated = 1;
            }
            break;
          case 1:
            if (input.bit_depth == 12 || input.fmt == AOM_IMG_FMT_I422 ||
                input.fmt == AOM_IMG_FMT_I42216) {
              stream->config.cfg.g_profile = 2;
              profile_updated = 1;
            } else if (input.bit_depth < 12 &&
                       (input.fmt == AOM_IMG_FMT_I420 ||
                        input.fmt == AOM_IMG_FMT_I42016)) {
              stream->config.cfg.g_profile = 0;
              profile_updated = 1;
            }
            break;
          case 2:
            if (input.bit_depth < 12 && (input.fmt == AOM_IMG_FMT_I444 ||
                                         input.fmt == AOM_IMG_FMT_I44416)) {
              stream->config.cfg.g_profile = 1;
              profile_updated = 1;
            } else if (input.bit_depth < 12 &&
                       (input.fmt == AOM_IMG_FMT_I420 ||
                        input.fmt == AOM_IMG_FMT_I42016)) {
              stream->config.cfg.g_profile = 0;
              profile_updated = 1;
            } else if (input.bit_depth == 12 &&
                       input.file_type == FILE_TYPE_Y4M) {
              // Note that here the input file values for chroma subsampling
              // are used instead of those from the command line.
              AOM_CODEC_CONTROL_TYPECHECKED(&stream->encoder,
                                            AV1E_SET_CHROMA_SUBSAMPLING_X,
                                            input.y4m.dst_c_dec_h >> 1);
              AOM_CODEC_CONTROL_TYPECHECKED(&stream->encoder,
                                            AV1E_SET_CHROMA_SUBSAMPLING_Y,
                                            input.y4m.dst_c_dec_v >> 1);
            } else if (input.bit_depth == 12 &&
                       input.file_type == FILE_TYPE_RAW) {
              AOM_CODEC_CONTROL_TYPECHECKED(&stream->encoder,
                                            AV1E_SET_CHROMA_SUBSAMPLING_X,
                                            stream->chroma_subsampling_x);
              AOM_CODEC_CONTROL_TYPECHECKED(&stream->encoder,
                                            AV1E_SET_CHROMA_SUBSAMPLING_Y,
                                            stream->chroma_subsampling_y);
            }
            break;
          default: break;
        }
      }

      /* Automatically set the codec bit depth to match the input bit depth.
       * Upgrade the profile if required. */
	  // 自动设置编解码器位深度以匹配输入位深度。如果需要,请升级配置文件。
      if (stream->config.cfg.g_input_bit_depth >
          (unsigned int)stream->config.cfg.g_bit_depth) {
        stream->config.cfg.g_bit_depth = stream->config.cfg.g_input_bit_depth;
        if (!global.quiet) {
          fprintf(stderr,
                  "Warning: automatically updating bit depth to %d to "
                  "match input format.\n",
                  stream->config.cfg.g_input_bit_depth);
        }
      }

      if (stream->config.cfg.g_bit_depth > 10) {
        switch (stream->config.cfg.g_profile) {
          case 0:
          case 1:
            stream->config.cfg.g_profile = 2;
            profile_updated = 1;
            break;
          default: break;
        }
      }
      if (stream->config.cfg.g_bit_depth > 8) {
        stream->config.use_16bit_internal = 1;
      }
      if (profile_updated && !global.quiet) {
        fprintf(stderr,
                "Warning: automatically updating to profile %d to "
                "match input format.\n",
                stream->config.cfg.g_profile);
      }
      /* Set limit */
      stream->config.cfg.g_limit = global.limit;
    }
	//设置流的维度
    FOREACH_STREAM(stream, streams) {
      set_stream_dimensions(stream, input.width, input.height);
    }
	// 检验流的配置
    FOREACH_STREAM(stream, streams) { validate_stream_config(stream, &global); }

    /* Ensure that --passes and --pass are consistent. If --pass is set and
     * --passes=2, ensure --fpf was set.
     */
	// 确保--passes和--pass一致。如果设置了--pass并且--passes=2,请确保设置了--fpf。
    if (global.pass && global.passes == 2) {
      FOREACH_STREAM(stream, streams) {
        if (!stream->config.stats_fn)
          die("Stream %d: Must specify --fpf when --pass=%d"
              " and --passes=2\n",
              stream->index, global.pass);
      }
    }

#if !CONFIG_WEBM_IO
    FOREACH_STREAM(stream, streams) {
      if (stream->config.write_webm) {
        stream->config.write_webm = 0;
        stream->config.write_ivf = 0;
        warn("aomenc compiled w/o WebM support. Writing OBU stream.");
      }
    }
#endif

    /* Use the frame rate from the file only if none was specified
     * on the command-line.
     */
	// 仅当在命令行上未指定时,才使用文件的帧速率。
    if (!global.have_framerate) {
      global.framerate.num = input.framerate.numerator;
      global.framerate.den = input.framerate.denominator;
    }
    FOREACH_STREAM(stream, streams) {
      stream->config.cfg.g_timebase.den = global.framerate.num;
      stream->config.cfg.g_timebase.num = global.framerate.den;
    }
    /* Show configuration */
	// 显示配置
    if (global.verbose && pass == 0) {
      FOREACH_STREAM(stream, streams) {
        show_stream_config(stream, &global, &input);
      }
    }

    if (pass == (global.pass ? global.pass - 1 : 0)) {
      if (input.file_type == FILE_TYPE_Y4M)
        /*The Y4M reader does its own allocation.
          Just initialize this here to avoid problems if we never read any
          frames.*/
        memset(&raw, 0, sizeof(raw));
      else
        aom_img_alloc(&raw, input.fmt, input.width, input.height, 32);

      FOREACH_STREAM(stream, streams) {
        stream->rate_hist =
            init_rate_histogram(&stream->config.cfg, &global.framerate);//初始化速率直方图
      }
    }

    FOREACH_STREAM(stream, streams) { setup_pass(stream, &global, pass); }//配置编码过程
    FOREACH_STREAM(stream, streams) { initialize_encoder(stream, &global); } //初始化编码器
    FOREACH_STREAM(stream, streams) {
      open_output_file(stream, &global, &input.pixel_aspect_ratio); //打开输出文件ivf
    }

    if (strcmp(global.codec->name, "av1") == 0 ||
        strcmp(global.codec->name, "av1") == 0) {
      // Check to see if at least one stream uses 16 bit internal.
      // Currently assume that the bit_depths for all streams using
      // highbitdepth are the same.
	  // 检查是否至少有一个流使用16位内部流。
	  // 目前假设所有使用高位深度的流的位_深度是相同的。
      FOREACH_STREAM(stream, streams) {
        if (stream->config.use_16bit_internal) {
          use_16bit_internal = 1;
        }
        input_shift = (int)stream->config.cfg.g_bit_depth -
                      stream->config.cfg.g_input_bit_depth;
      };
    }

    frame_avail = 1;
    got_data = 0;

    while (frame_avail || got_data) { //循环读取视频帧
      struct aom_usec_timer timer;

	  // 如果没有限制读取的帧数或者当前读取的帧数小于限制读取的帧数
      if (!global.limit || frames_in < global.limit) {
        frame_avail = read_frame(&input, &raw);//读取视频帧

        if (frame_avail) frames_in++;
		// 不显示跳过的前n帧
        seen_frames =
            frames_in > global.skip_frames ? frames_in - global.skip_frames : 0;
		// 打印编码进度
        if (!global.quiet) {// quiet表示"不打印编码进度"
          float fps = usec_to_fps(cx_time, seen_frames);
          fprintf(stderr, "\rPass %d/%d ", pass + 1, global.passes);

          if (stream_cnt == 1)
            fprintf(stderr, "frame %4d/%-4d %7" PRId64 "B ", frames_in,
                    streams->frames_out, (int64_t)streams->nbytes);
          else
            fprintf(stderr, "frame %4d ", frames_in);

          fprintf(stderr, "%7" PRId64 " %s %.2f %s ",
                  cx_time > 9999999 ? cx_time / 1000 : cx_time,
                  cx_time > 9999999 ? "ms" : "us", fps >= 1.0 ? fps : fps * 60,
                  fps >= 1.0 ? "fps" : "fpm");
          print_time("ETA", estimated_time_left);
        }

      } else {
        frame_avail = 0;
      }
	  //如果当前读取的帧大于跳过的前n帧,则对当前帧进行编码
      if (frames_in > global.skip_frames) 
      {
        aom_image_t *frame_to_encode; //当前需要编码的帧
        if (input_shift || (use_16bit_internal && input.bit_depth == 8))
        {
          assert(use_16bit_internal);
          // Input bit depth and stream bit depth do not match, so up
          // shift frame to stream bit depth
		  // 输入比特深度和流比特深度不匹配,因此将帧比特深度移位到流比特深度
          if (!allocated_raw_shift) 
          {
            aom_img_alloc(&raw_shift, raw.fmt | AOM_IMG_FMT_HIGHBITDEPTH,
                          input.width, input.height, 32);
            allocated_raw_shift = 1;
          }
          aom_img_upshift(&raw_shift, &raw, input_shift);
          frame_to_encode = &raw_shift;
        } 
        else 
        {
          frame_to_encode = &raw;
        }
        aom_usec_timer_start(&timer);
        if (use_16bit_internal) { //使用16bit深度
          assert(frame_to_encode->fmt & AOM_IMG_FMT_HIGHBITDEPTH);
          FOREACH_STREAM(stream, streams) {//遍历所有的流,并编码当前帧
            if (stream->config.use_16bit_internal)
              encode_frame(stream, &global,
                           frame_avail ? frame_to_encode : NULL, frames_in);
            else
              assert(0);
          };
        } else {
          assert((frame_to_encode->fmt & AOM_IMG_FMT_HIGHBITDEPTH) == 0);
          FOREACH_STREAM(stream, streams) {//遍历所有的流,并编码当前帧
            encode_frame(stream, &global, frame_avail ? frame_to_encode : NULL,
                         frames_in);
          }
        }
        aom_usec_timer_mark(&timer);
        cx_time += aom_usec_timer_elapsed(&timer);

		//更新量化器
        FOREACH_STREAM(stream, streams) { update_quantizer_histogram(stream); }

        got_data = 0;
        FOREACH_STREAM(stream, streams) {
          get_cx_data(stream, &global, &got_data);
        }

        if (!got_data && input.length && streams != NULL &&
            !streams->frames_out) {
          lagged_count = global.limit ? seen_frames : ftello(input.file);
        } else if (input.length) {
          int64_t remaining;
          int64_t rate;

          if (global.limit) {
            const int64_t frame_in_lagged = (seen_frames - lagged_count) * 1000;

            rate = cx_time ? frame_in_lagged * (int64_t)1000000 / cx_time : 0;
            remaining = 1000 * (global.limit - global.skip_frames -
                                seen_frames + lagged_count);
          } else {
            const int64_t input_pos = ftello(input.file);
            const int64_t input_pos_lagged = input_pos - lagged_count;
            const int64_t input_limit = input.length;

            rate = cx_time ? input_pos_lagged * (int64_t)1000000 / cx_time : 0;
            remaining = input_limit - input_pos + lagged_count;
          }

          average_rate = (average_rate <= 0) ? rate : (average_rate * 7 + rate) / 8;
          estimated_time_left = average_rate ? remaining / average_rate : -1;
        }

        if (got_data && global.test_decode != TEST_DECODE_OFF) {
          FOREACH_STREAM(stream, streams) {
            test_decode(stream, global.test_decode);
          }
        }
      }// frames_in > global.skip_frames

      fflush(stdout);
      if (!global.quiet) fprintf(stderr, "\033[K");
    } // //循环读取视频帧

    if (stream_cnt > 1) fprintf(stderr, "\n");

    if (!global.quiet) {
      FOREACH_STREAM(stream, streams) {
        const int64_t bpf =
            seen_frames ? (int64_t)(stream->nbytes * 8 / seen_frames) : 0;
        const int64_t bps = bpf * global.framerate.num / global.framerate.den;
        fprintf(stderr,
                "\rPass %d/%d frame %4d/%-4d %7" PRId64 "B %7" PRId64
                "b/f %7" PRId64
                "b/s"
                " %7" PRId64 " %s (%.2f fps)\033[K\n",
                pass + 1, global.passes, frames_in, stream->frames_out,
                (int64_t)stream->nbytes, bpf, bps,
                stream->cx_time > 9999999 ? stream->cx_time / 1000
                                          : stream->cx_time,
                stream->cx_time > 9999999 ? "ms" : "us",
                usec_to_fps(stream->cx_time, seen_frames));
      }
    }

    if (global.show_psnr) 
    {
      if (global.codec->fourcc == AV1_FOURCC) 
      {
        FOREACH_STREAM(stream, streams) 
        {
          int64_t bps = 0;
          if (stream->psnr_count && seen_frames && global.framerate.den)
          {
            bps = (int64_t)stream->nbytes * 8 * (int64_t)global.framerate.num / global.framerate.den / seen_frames;
          }
		  // 打印PSNR
          show_psnr(stream, (1 << stream->config.cfg.g_input_bit_depth) - 1,bps);
          //static void show_psnr(struct stream_state *stream, double peak, int64_t bps) 
        }
      } 
      else 
      {
        FOREACH_STREAM(stream, streams) { show_psnr(stream, 255.0, 0); }
      }
    }

	//销毁编码器
    FOREACH_STREAM(stream, streams) { aom_codec_destroy(&stream->encoder); }

    if (global.test_decode != TEST_DECODE_OFF) {
      FOREACH_STREAM(stream, streams) { aom_codec_destroy(&stream->decoder); }
    }

    close_input_file(&input);

    if (global.test_decode == TEST_DECODE_FATAL) {
      FOREACH_STREAM(stream, streams) { res |= stream->mismatch_seen; }
    }
    FOREACH_STREAM(stream, streams) {
      close_output_file(stream, global.codec->fourcc);//关闭文件
    }

    FOREACH_STREAM(stream, streams) {
      stats_close(&stream->stats, global.passes - 1);
    }

    if (global.pass) break;
  } //Pass End

  if (global.show_q_hist_buckets) {// 显示量化器直方图
    FOREACH_STREAM(stream, streams) {
      show_q_histogram(stream->counts, global.show_q_hist_buckets);
    }
  }

  if (global.show_rate_hist_buckets) { //显示速率直方图
    FOREACH_STREAM(stream, streams) {
      show_rate_histogram(stream->rate_hist, &stream->config.cfg,
                          global.show_rate_hist_buckets);
    }
  }
  FOREACH_STREAM(stream, streams) { destroy_rate_histogram(stream->rate_hist); }

#if CONFIG_INTERNAL_STATS
  /* TODO(jkoleszar): This doesn't belong in this executable. Do it for now,
   * to match some existing utilities.
   */
  if (!(global.pass == 1 && global.passes == 2)) {
    FOREACH_STREAM(stream, streams) {
      FILE *f = fopen("opsnr.stt", "a");
      if (stream->mismatch_seen) {
        fprintf(f, "First mismatch occurred in frame %d\n",
                stream->mismatch_seen);
      } else {
        fprintf(f, "No mismatch detected in recon buffers\n");
      }
      fclose(f);
    }
  }
#endif

  if (allocated_raw_shift) aom_img_free(&raw_shift);
  aom_img_free(&raw);
  free(argv);
  free(streams);
  return res ? EXIT_FAILURE : EXIT_SUCCESS;
}

 

Guess you like

Origin blog.csdn.net/BigDream123/article/details/109283640