AV1 code learning: functions encode_frame and aom_codec_encode

1.encode_frame function

In the main function of the encoding end aomenc.c, after entering the encoding process loop, read each frame of the video in a loop, and then encode each frame through the encode_frame function.

The encode_frame function is mainly to perform some work of scaling the current frame, start the timer, and then call the aom_codec_encode function to encode.

static void encode_frame(struct stream_state *stream,
                         struct AvxEncoderConfig *global, struct aom_image *img,
                         unsigned int frames_in) {
  aom_codec_pts_t frame_start, next_frame_start; //起始时间戳
  struct aom_codec_enc_cfg *cfg = &stream->config.cfg;
  struct aom_usec_timer timer;

  frame_start =
      (cfg->g_timebase.den * (int64_t)(frames_in - 1) * global->framerate.den) /
      cfg->g_timebase.num / global->framerate.num;
  next_frame_start =
      (cfg->g_timebase.den * (int64_t)(frames_in)*global->framerate.den) /
      cfg->g_timebase.num / global->framerate.num;

  /* Scale if necessary */
  if (img) {
    if ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) &&
        (img->d_w != cfg->g_w || img->d_h != cfg->g_h)) {
      if (img->fmt != AOM_IMG_FMT_I42016) {
        fprintf(stderr, "%s can only scale 4:2:0 inputs\n", exec_name);
        exit(EXIT_FAILURE);
      }
#if CONFIG_LIBYUV
      if (!stream->img) {
        stream->img =
            aom_img_alloc(NULL, AOM_IMG_FMT_I42016, cfg->g_w, cfg->g_h, 16);
      }
      I420Scale_16(
          (uint16_t *)img->planes[AOM_PLANE_Y], img->stride[AOM_PLANE_Y] / 2,
          (uint16_t *)img->planes[AOM_PLANE_U], img->stride[AOM_PLANE_U] / 2,
          (uint16_t *)img->planes[AOM_PLANE_V], img->stride[AOM_PLANE_V] / 2,
          img->d_w, img->d_h, (uint16_t *)stream->img->planes[AOM_PLANE_Y],
          stream->img->stride[AOM_PLANE_Y] / 2,
          (uint16_t *)stream->img->planes[AOM_PLANE_U],
          stream->img->stride[AOM_PLANE_U] / 2,
          (uint16_t *)stream->img->planes[AOM_PLANE_V],
          stream->img->stride[AOM_PLANE_V] / 2, stream->img->d_w,
          stream->img->d_h, kFilterBox);
      img = stream->img;
#else
      stream->encoder.err = 1;
      ctx_exit_on_error(&stream->encoder,
                        "Stream %d: Failed to encode frame.\n"
                        "libyuv is required for scaling but is currently "
                        "disabled.\n"
                        "Be sure to specify -DCONFIG_LIBYUV=1 when running "
                        "cmake.\n",
                        stream->index);
#endif
    }
  }
  if (img && (img->d_w != cfg->g_w || img->d_h != cfg->g_h)) {
    if (img->fmt != AOM_IMG_FMT_I420 && img->fmt != AOM_IMG_FMT_YV12) {
      fprintf(stderr, "%s can only scale 4:2:0 8bpp inputs\n", exec_name);
      exit(EXIT_FAILURE);
    }
#if CONFIG_LIBYUV
    if (!stream->img)
      stream->img =
          aom_img_alloc(NULL, AOM_IMG_FMT_I420, cfg->g_w, cfg->g_h, 16);
    I420Scale(
        img->planes[AOM_PLANE_Y], img->stride[AOM_PLANE_Y],
        img->planes[AOM_PLANE_U], img->stride[AOM_PLANE_U],
        img->planes[AOM_PLANE_V], img->stride[AOM_PLANE_V], img->d_w, img->d_h,
        stream->img->planes[AOM_PLANE_Y], stream->img->stride[AOM_PLANE_Y],
        stream->img->planes[AOM_PLANE_U], stream->img->stride[AOM_PLANE_U],
        stream->img->planes[AOM_PLANE_V], stream->img->stride[AOM_PLANE_V],
        stream->img->d_w, stream->img->d_h, kFilterBox);
    img = stream->img;
#else
    stream->encoder.err = 1;
    ctx_exit_on_error(&stream->encoder,
                      "Stream %d: Failed to encode frame.\n"
                      "Scaling disabled in this configuration. \n"
                      "To enable, configure with --enable-libyuv\n",
                      stream->index);
#endif
  }

  aom_usec_timer_start(&timer);
  aom_codec_encode(&stream->encoder, img, frame_start,
                   (uint32_t)(next_frame_start - frame_start), 0);
  aom_usec_timer_mark(&timer);
  stream->cx_time += aom_usec_timer_elapsed(&timer);
  ctx_exit_on_error(&stream->encoder, "Stream %d: Failed to encode frame",
                    stream->index);
}

2.aom_codec_encode function

The main function of this function is to encode a frame. Encode the video frame at a given "presentation time." The presentation time stamp (PTS) must be strictly increased.

aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img,
                                 aom_codec_pts_t pts, unsigned long duration,
                                 aom_enc_frame_flags_t flags);

aom_codec_ctx_t codec context structure, the actual interface between user code and codec
All codecs must fully support this context structure. Generally speaking, these data should be considered as private data of the codec algorithm and not manipulated or checked by the calling application. The application can reference the "name" member to obtain a printable description of the algorithm.

It stores the name of the codec, the pointer to the aom codec that initialized it, the initialization flag, the configuration of the encoder or decoder, and the pointer to internal data.

typedef struct aom_codec_ctx {
  const char *name;             /**< Printable interface name 可打印接口名称 */
  aom_codec_iface_t *iface;     /**< Interface pointers 接口指针*/
  aom_codec_err_t err;          /**< Last returned error 上次返回的错误*/
  const char *err_detail;       /**< Detailed info, if available 详细信息（如果有） */
  aom_codec_flags_t init_flags; /**< Flags passed at init time 初始化时传递的标志*/
  union {
    /**< Decoder Configuration Pointer 解码器配置指针 */
    const struct aom_codec_dec_cfg *dec;
    /**< Encoder Configuration Pointer */
    const struct aom_codec_enc_cfg *enc;
    const void *raw;
  } config;               /**< Configuration pointer aliasing union */
  aom_codec_priv_t *priv; /**< Algorithm private storage 算法专用存储 */
} aom_codec_ctx_t;

parameter:

ctx points to the ctx pointer of this instance context
img The img image data to be encoded, NULL means refreshing the buffer.
pts demonstrates the timestamp, expressed in time base units.
Duration displays the duration of the frame, in units of time base.
flags are used to encode the flags of this frame.

return value:

AOM_CODEC_OK: The operation has been completed without errors.
AOM_CODEC_INCAPABLE: The algorithm has no required functions.
AOM_CODEC_INVALID_PARAM: The parameters provided by the application are invalid, the image format is not supported, etc.

When the last frame has been passed to the encoder, you should continue to call this function and set the img parameter to NULL. This will signal the end of the stream to the encoder and allow it to encode any reserved buffers. When aom_codec_encode() is called and aom_codec_get_cx_data() does not return any data, encoding is complete.

This function mainly calls encoder_encode for encoding.

aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img,
                                 aom_codec_pts_t pts, unsigned long duration,
                                 aom_enc_frame_flags_t flags) {
  aom_codec_err_t res = AOM_CODEC_OK;

  if (!ctx || (img && !duration))
    res = AOM_CODEC_INVALID_PARAM;
  else if (!ctx->iface || !ctx->priv)
    res = AOM_CODEC_ERROR;
  else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
    res = AOM_CODEC_INCAPABLE;
  else {
    /* Execute in a normalized floating point environment, if the platform
     * requires it.
     */
    FLOATING_POINT_INIT
    res = ctx->iface->enc.encode(get_alg_priv(ctx), img, pts, duration, flags);
    FLOATING_POINT_RESTORE
  }

The function by get_alg_priv () function to get aom_codec_ctx_t structural body aom_codec_priv_t pointer

static aom_codec_alg_priv_t *get_alg_priv(aom_codec_ctx_t *ctx) {
  return (aom_codec_alg_priv_t *)ctx->priv;
}

aom_codec_priv_t

Codec private data structure.
Contains data specific to the codec implementation. This structure is not transparent to the application.