AV1 code learning: encoder_encode function

Enter the encoder_encode function from the aom_codec_encode function.

The main flow of this function is as follows:

  1. Check the current frame and calculate the coded data size ctx->cx_data_sz (coded data is stored in ctx->cx_data)
  2. According to the previously parsed configuration setting encoding information ( av1_change_config function), during encoding, you can access a lot of specific encoding information by accessing the AV1_COMP structure of ctx->cpi.
  3. Application encoding flags ( av1_apply_encoding_flags ) (this part does not understand the meaning)
  4. Initialize a series of structures and buffers, including lookahead structure and reference frame list
  5. Call the av1_get_compressed_data function to encode to obtain the encoded data; return -1 means that no frame is encoded (because AV1 has a lag-in-frame, the default is 19, that is, frame 0 will not be encoded until frame 19)
  6. If there is encoded data (frame_size is not zero), write the header information of the frame into the header information of the code stream

note:

In AV1, the default is two encoding processes. The first encoding is only used to count the encoding information, and the encoding is not actually performed, that is, the information is not written into the code stream; the second encoding is the real encoding.

The code and comments are as follows:

static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
                                      const aom_image_t *img,
                                      aom_codec_pts_t pts,
                                      unsigned long duration,
                                      aom_enc_frame_flags_t enc_flags) {
  const size_t kMinCompressedSize = 8192;
  volatile aom_codec_err_t res = AOM_CODEC_OK;
  AV1_COMP *const cpi = ctx->cpi;//包含编码配置信息
  const aom_rational64_t *const timestamp_ratio = &ctx->timestamp_ratio;
  volatile aom_codec_pts_t ptsvol = pts;
  // LAP context
  AV1_COMP *cpi_lap = ctx->cpi_lap;

  if (cpi == NULL) return AOM_CODEC_INVALID_PARAM;

  if (cpi->lap_enabled && cpi_lap == NULL && cpi->oxcf.pass == 0)
    return AOM_CODEC_INVALID_PARAM;

  if (img != NULL) { //如果图片不为空
    res = validate_img(ctx, img);
    // TODO(jzern) the checks related to cpi's validity should be treated as a
    // failure condition, encoder setup is done fully in init() currently.
	// TODO(jzern)与cpi的有效性相关的检查应该被视为一个失败的条件,编码器的设置目前完全在init()中完成。
    if (res == AOM_CODEC_OK) {
      size_t data_sz = ALIGN_POWER_OF_TWO(ctx->cfg.g_w, 5) *
                       ALIGN_POWER_OF_TWO(ctx->cfg.g_h, 5) * get_image_bps(img);//数据尺寸
      if (data_sz < kMinCompressedSize) data_sz = kMinCompressedSize;//最小的数据尺寸
      if (ctx->cx_data == NULL || ctx->cx_data_sz < data_sz) {
        ctx->cx_data_sz = data_sz;
        free(ctx->cx_data);
        ctx->cx_data = (unsigned char *)malloc(ctx->cx_data_sz);
        if (ctx->cx_data == NULL) {
          return AOM_CODEC_MEM_ERROR;
        }
      }
    }
  }
  if (ctx->oxcf.mode != GOOD && ctx->oxcf.mode != REALTIME) {
    ctx->oxcf.mode = GOOD;
    av1_change_config(ctx->cpi, &ctx->oxcf); //设置编码配置
  }

  if (!ctx->pts_offset_initialized) {
    ctx->pts_offset = ptsvol;
    ctx->pts_offset_initialized = 1;
  }
  ptsvol -= ctx->pts_offset;

  aom_codec_pkt_list_init(&ctx->pkt_list);

  volatile aom_enc_frame_flags_t flags = enc_flags;

  // The jmp_buf is valid only for the duration of the function that calls
  // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
  // before it returns.
  if (setjmp(cpi->common.error.jmp)) {
    cpi->common.error.setjmp = 0;
    res = update_error_state(ctx, &cpi->common.error);
    aom_clear_system_state();
    return res;
  }
  cpi->common.error.setjmp = 1;
  if (cpi_lap != NULL) {
    if (setjmp(cpi_lap->common.error.jmp)) {
      cpi_lap->common.error.setjmp = 0;
      res = update_error_state(ctx, &cpi_lap->common.error);
      aom_clear_system_state();
      return res;
    }
    cpi_lap->common.error.setjmp = 1;
  }

  // Note(yunqing): While applying encoding flags, always start from enabling
  // all, and then modifying according to the flags. Previous frame's flags are
  // overwritten.
  // 注意(yunqing):在应用编码标志时,总是从启用all开始,然后根据标志进行修改。上一帧的标志被覆盖。
  av1_apply_encoding_flags(cpi, flags);
  if (cpi_lap != NULL) {
    av1_apply_encoding_flags(cpi_lap, flags);
  }

  // Handle fixed keyframe intervals 处理固定的关键帧间隔
  if (is_stat_generation_stage(cpi)) {
    if (ctx->cfg.kf_mode == AOM_KF_AUTO &&
        ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) {
      if (cpi->common.spatial_layer_id == 0 &&
          ++ctx->fixed_kf_cntr > ctx->cfg.kf_min_dist) {
        flags |= AOM_EFLAG_FORCE_KF;
        ctx->fixed_kf_cntr = 1;
      }
    }
  }

  if (res == AOM_CODEC_OK) {
    int64_t dst_time_stamp = timebase_units_to_ticks(timestamp_ratio, ptsvol);
    int64_t dst_end_time_stamp =
        timebase_units_to_ticks(timestamp_ratio, ptsvol + duration);

    // Set up internal flags
    if (ctx->base.init_flags & AOM_CODEC_USE_PSNR) cpi->b_calculate_psnr = 1;//使用PSNR

    if (img != NULL) {
      YV12_BUFFER_CONFIG sd;
      int use_highbitdepth, subsampling_x, subsampling_y;
      res = image2yuvconfig(img, &sd);//根据图片获得YUV配置
      use_highbitdepth = (sd.flags & YV12_FLAG_HIGHBITDEPTH) != 0;
      subsampling_x = sd.subsampling_x;
      subsampling_y = sd.subsampling_y;

      if (!cpi->lookahead) {
		// 我们开始编码之前有多少帧延迟
        int lag_in_frames = cpi_lap != NULL ? cpi_lap->oxcf.lag_in_frames
                                            : cpi->oxcf.lag_in_frames;
		//初始化lookahead阶段
		//lookahead阶段是一个帧缓冲区队列,当缓冲区排队时,可以对其进行一些分析。
        cpi->lookahead = av1_lookahead_init(
            cpi->oxcf.width, cpi->oxcf.height, subsampling_x, subsampling_y,
            use_highbitdepth, lag_in_frames, cpi->oxcf.border_in_pixels,
            cpi->common.features.byte_alignment, ctx->num_lap_buffers);
      }
      if (!cpi->lookahead)
        aom_internal_error(&cpi->common.error, AOM_CODEC_MEM_ERROR,
                           "Failed to allocate lag buffers");

      av1_check_initial_width(cpi, use_highbitdepth, subsampling_x,
                              subsampling_y);
      if (cpi_lap != NULL) {
        cpi_lap->lookahead = cpi->lookahead;
        av1_check_initial_width(cpi_lap, use_highbitdepth, subsampling_x,
                                subsampling_y);
      }

      // Store the original flags in to the frame buffer. Will extract the
      // key frame flag when we actually encode this frame.
	  // 将原始标志存储到帧缓冲区。将在实际编码此帧时提取关键帧标志。
      if (av1_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd,
                                dst_time_stamp, dst_end_time_stamp)) {
        res = update_error_state(ctx, &cpi->common.error);
      }
      ctx->next_frame_flags = 0;
    }//img != NULL

    unsigned char *cx_data = ctx->cx_data;
    size_t cx_data_sz = ctx->cx_data_sz;

    assert(!(cx_data == NULL && cx_data_sz != 0));

    /* Any pending invisible frames?任何挂起的不可见帧? */
    if (ctx->pending_cx_data) {
      memmove(cx_data, ctx->pending_cx_data, ctx->pending_cx_data_sz);
      ctx->pending_cx_data = cx_data;
      cx_data += ctx->pending_cx_data_sz;
      cx_data_sz -= ctx->pending_cx_data_sz;

      /* TODO: this is a minimal check, the underlying codec doesn't respect
       * the buffer size anyway.
	   * 这是一个最小的检查,底层编解码器无论如何都不考虑缓冲区大小。
       */
      if (cx_data_sz < ctx->cx_data_sz / 2) {
        aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR,
                           "Compressed data buffer too small");
      }
    }

    size_t frame_size = 0;
    unsigned int lib_flags = 0;
    int is_frame_visible = 0;
    int index_size = 0;
    int has_fwd_keyframe = 0;

    // Call for LAP stage
    if (cpi_lap != NULL) {
      int status;
      aom_rational64_t timestamp_ratio_la = *timestamp_ratio;
      int64_t dst_time_stamp_la = dst_time_stamp;
      int64_t dst_end_time_stamp_la = dst_end_time_stamp;
      status = av1_get_compressed_data(
          cpi_lap, &lib_flags, &frame_size, NULL, &dst_time_stamp_la,
          &dst_end_time_stamp_la, !img, &timestamp_ratio_la);//获得编码后的数据
      if (status != -1) {
        if (status != AOM_CODEC_OK) {
          aom_internal_error(&cpi_lap->common.error, AOM_CODEC_ERROR, NULL);
        }
        cpi_lap->seq_params_locked = 1;
      }
      lib_flags = 0;
      frame_size = 0;
    }

    // invisible frames get packed with the next visible frame 
	// 不可见的帧与下一个可见的帧打包在一起
    while (cx_data_sz - index_size >= ctx->cx_data_sz / 2 &&
           !is_frame_visible) {
      const int status = av1_get_compressed_data(
          cpi, &lib_flags, &frame_size, cx_data, &dst_time_stamp,
          &dst_end_time_stamp, !img, timestamp_ratio);//获得编码后的数据
      if (status == -1) break;
      if (status != AOM_CODEC_OK) {
        aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
      }

      cpi->seq_params_locked = 1;//或已发送或未锁定参数。一旦锁定av1_change_config就无法更改seq参数。
      if (frame_size) {
        if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;

        const int write_temporal_delimiter =
            !cpi->common.spatial_layer_id && !ctx->pending_frame_count;

        if (write_temporal_delimiter) {
          uint32_t obu_header_size = 1;
          const uint32_t obu_payload_size = 0;
          const size_t length_field_size =
              aom_uleb_size_in_bytes(obu_payload_size);

          if (ctx->pending_cx_data) {
            const size_t move_offset = length_field_size + 1;
            memmove(ctx->pending_cx_data + move_offset, ctx->pending_cx_data,
                    frame_size);
          }
          const uint32_t obu_header_offset = 0;
          obu_header_size = av1_write_obu_header(
              &cpi->level_params, OBU_TEMPORAL_DELIMITER, 0,
              (uint8_t *)(ctx->pending_cx_data + obu_header_offset));

          // OBUs are preceded/succeeded by an unsigned leb128 coded integer.
          if (av1_write_uleb_obu_size(obu_header_size, obu_payload_size,
                                      ctx->pending_cx_data) != AOM_CODEC_OK) {
            aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
          }

          frame_size += obu_header_size + obu_payload_size + length_field_size;
        }

        if (ctx->oxcf.save_as_annexb) {
          size_t curr_frame_size = frame_size;
          if (av1_convert_sect5obus_to_annexb(cx_data, &curr_frame_size) !=
              AOM_CODEC_OK) {
            aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
          }
          frame_size = curr_frame_size;

          // B_PRIME (add frame size)
          const size_t length_field_size = aom_uleb_size_in_bytes(frame_size);
          if (ctx->pending_cx_data) {
            const size_t move_offset = length_field_size;
            memmove(cx_data + move_offset, cx_data, frame_size);
          }
          if (av1_write_uleb_obu_size(0, (uint32_t)frame_size, cx_data) !=
              AOM_CODEC_OK) {
            aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
          }
          frame_size += length_field_size;
        }

        ctx->pending_frame_sizes[ctx->pending_frame_count++] = frame_size;
        ctx->pending_cx_data_sz += frame_size;

        cx_data += frame_size;
        cx_data_sz -= frame_size;

        index_size = MAG_SIZE * (ctx->pending_frame_count - 1) + 2;

        is_frame_visible = cpi->common.show_frame;

        has_fwd_keyframe |= (!is_frame_visible &&
                             cpi->common.current_frame.frame_type == KEY_FRAME);
      }
    }

    if (is_frame_visible) {
      // Add the frame packet to the list of returned packets.
	  // 将帧数据包添加到返回的数据包列表中。
      aom_codec_cx_pkt_t pkt;

      if (ctx->oxcf.save_as_annexb) {
        //  B_PRIME (add TU size)
        size_t tu_size = ctx->pending_cx_data_sz;
        const size_t length_field_size = aom_uleb_size_in_bytes(tu_size);
        if (ctx->pending_cx_data) {
          const size_t move_offset = length_field_size;
          memmove(ctx->pending_cx_data + move_offset, ctx->pending_cx_data,
                  tu_size);
        }
        if (av1_write_uleb_obu_size(0, (uint32_t)tu_size,
                                    ctx->pending_cx_data) != AOM_CODEC_OK) {
          aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
        }
        ctx->pending_cx_data_sz += length_field_size;
      }

      pkt.kind = AOM_CODEC_CX_FRAME_PKT;

      pkt.data.frame.buf = ctx->pending_cx_data;
      pkt.data.frame.sz = ctx->pending_cx_data_sz;
      pkt.data.frame.partition_id = -1;
      pkt.data.frame.vis_frame_size = frame_size;

      pkt.data.frame.pts =
          ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
          ctx->pts_offset;
      pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
      if (has_fwd_keyframe) {
        // If one of the invisible frames in the packet is a keyframe, set
        // the delayed random access point flag.
		// 如果包中的一个不可见帧是关键帧,则设置延迟随机访问点标志。
        pkt.data.frame.flags |= AOM_FRAME_IS_DELAYED_RANDOM_ACCESS_POINT;
      }
      pkt.data.frame.duration = (uint32_t)ticks_to_timebase_units(
          timestamp_ratio, dst_end_time_stamp - dst_time_stamp);

      aom_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);

      ctx->pending_cx_data = NULL;
      ctx->pending_cx_data_sz = 0;
      ctx->pending_frame_count = 0;
    }
  } //if(res)

  cpi->common.error.setjmp = 0;
  return res;
}

 

Guess you like

Origin blog.csdn.net/BigDream123/article/details/109497488