Apprentissage du code AV1: fonctions encode_frame et aom_codec_encode

1. fonction d'encode_frame

Dans la fonction principale de l'aomenc.c de la fin du codage, après être entré dans la boucle du processus de codage, lisez chaque image de la vidéo dans une boucle, puis codez chaque image via la fonction encode_frame .

La fonction encode_frame consiste principalement à effectuer un travail de mise à l'échelle de la trame courante, à démarrer le minuteur, puis à appeler la fonction aom_codec_encode pour encoder.

static void encode_frame(struct stream_state *stream,
                         struct AvxEncoderConfig *global, struct aom_image *img,
                         unsigned int frames_in) {
  aom_codec_pts_t frame_start, next_frame_start; //起始时间戳
  struct aom_codec_enc_cfg *cfg = &stream->config.cfg;
  struct aom_usec_timer timer;

  frame_start =
      (cfg->g_timebase.den * (int64_t)(frames_in - 1) * global->framerate.den) /
      cfg->g_timebase.num / global->framerate.num;
  next_frame_start =
      (cfg->g_timebase.den * (int64_t)(frames_in)*global->framerate.den) /
      cfg->g_timebase.num / global->framerate.num;

  /* Scale if necessary */
  if (img) {
    if ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) &&
        (img->d_w != cfg->g_w || img->d_h != cfg->g_h)) {
      if (img->fmt != AOM_IMG_FMT_I42016) {
        fprintf(stderr, "%s can only scale 4:2:0 inputs\n", exec_name);
        exit(EXIT_FAILURE);
      }
#if CONFIG_LIBYUV
      if (!stream->img) {
        stream->img =
            aom_img_alloc(NULL, AOM_IMG_FMT_I42016, cfg->g_w, cfg->g_h, 16);
      }
      I420Scale_16(
          (uint16_t *)img->planes[AOM_PLANE_Y], img->stride[AOM_PLANE_Y] / 2,
          (uint16_t *)img->planes[AOM_PLANE_U], img->stride[AOM_PLANE_U] / 2,
          (uint16_t *)img->planes[AOM_PLANE_V], img->stride[AOM_PLANE_V] / 2,
          img->d_w, img->d_h, (uint16_t *)stream->img->planes[AOM_PLANE_Y],
          stream->img->stride[AOM_PLANE_Y] / 2,
          (uint16_t *)stream->img->planes[AOM_PLANE_U],
          stream->img->stride[AOM_PLANE_U] / 2,
          (uint16_t *)stream->img->planes[AOM_PLANE_V],
          stream->img->stride[AOM_PLANE_V] / 2, stream->img->d_w,
          stream->img->d_h, kFilterBox);
      img = stream->img;
#else
      stream->encoder.err = 1;
      ctx_exit_on_error(&stream->encoder,
                        "Stream %d: Failed to encode frame.\n"
                        "libyuv is required for scaling but is currently "
                        "disabled.\n"
                        "Be sure to specify -DCONFIG_LIBYUV=1 when running "
                        "cmake.\n",
                        stream->index);
#endif
    }
  }
  if (img && (img->d_w != cfg->g_w || img->d_h != cfg->g_h)) {
    if (img->fmt != AOM_IMG_FMT_I420 && img->fmt != AOM_IMG_FMT_YV12) {
      fprintf(stderr, "%s can only scale 4:2:0 8bpp inputs\n", exec_name);
      exit(EXIT_FAILURE);
    }
#if CONFIG_LIBYUV
    if (!stream->img)
      stream->img =
          aom_img_alloc(NULL, AOM_IMG_FMT_I420, cfg->g_w, cfg->g_h, 16);
    I420Scale(
        img->planes[AOM_PLANE_Y], img->stride[AOM_PLANE_Y],
        img->planes[AOM_PLANE_U], img->stride[AOM_PLANE_U],
        img->planes[AOM_PLANE_V], img->stride[AOM_PLANE_V], img->d_w, img->d_h,
        stream->img->planes[AOM_PLANE_Y], stream->img->stride[AOM_PLANE_Y],
        stream->img->planes[AOM_PLANE_U], stream->img->stride[AOM_PLANE_U],
        stream->img->planes[AOM_PLANE_V], stream->img->stride[AOM_PLANE_V],
        stream->img->d_w, stream->img->d_h, kFilterBox);
    img = stream->img;
#else
    stream->encoder.err = 1;
    ctx_exit_on_error(&stream->encoder,
                      "Stream %d: Failed to encode frame.\n"
                      "Scaling disabled in this configuration. \n"
                      "To enable, configure with --enable-libyuv\n",
                      stream->index);
#endif
  }

  aom_usec_timer_start(&timer);
  aom_codec_encode(&stream->encoder, img, frame_start,
                   (uint32_t)(next_frame_start - frame_start), 0);
  aom_usec_timer_mark(&timer);
  stream->cx_time += aom_usec_timer_elapsed(&timer);
  ctx_exit_on_error(&stream->encoder, "Stream %d: Failed to encode frame",
                    stream->index);
}

2. fonction aom_codec_encode

La fonction principale de cette fonction est d'encoder une trame. Encodez l'image vidéo à une «heure de présentation» donnée. L'horodatage de la présentation (PTS) doit être strictement augmenté.

aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img,
                                 aom_codec_pts_t pts, unsigned long duration,
                                 aom_enc_frame_flags_t flags);

structure de contexte du codec aom_codec_ctx_t , l'interface réelle entre le code utilisateur et le codec
Tous les codecs doivent pleinement prendre en charge cette structure de contexte. D'une manière générale, ces données doivent être considérées comme des données privées de l'algorithme du codec et non manipulées ou vérifiées par l'application appelante. L'application peut référencer le membre "nom" pour obtenir une description imprimable de l'algorithme.

Il stocke le nom du codec, un pointeur vers le codec aom qui l'a initialisé, des drapeaux d'initialisation, la configuration du codeur ou du décodeur et un pointeur vers les données internes.

typedef struct aom_codec_ctx {
  const char *name;             /**< Printable interface name 可打印接口名称 */
  aom_codec_iface_t *iface;     /**< Interface pointers 接口指针*/
  aom_codec_err_t err;          /**< Last returned error 上次返回的错误*/
  const char *err_detail;       /**< Detailed info, if available 详细信息（如果有） */
  aom_codec_flags_t init_flags; /**< Flags passed at init time 初始化时传递的标志*/
  union {
    /**< Decoder Configuration Pointer 解码器配置指针 */
    const struct aom_codec_dec_cfg *dec;
    /**< Encoder Configuration Pointer */
    const struct aom_codec_enc_cfg *enc;
    const void *raw;
  } config;               /**< Configuration pointer aliasing union */
  aom_codec_priv_t *priv; /**< Algorithm private storage 算法专用存储 */
} aom_codec_ctx_t;

paramètre:

ctx pointe vers le pointeur ctx de ce contexte d'instance
img Les données d'image img à encoder, NULL signifie actualiser le tampon.
pts montre l'horodatage, exprimé en unités de base de temps.
Durée affiche la durée de l'image, en unités de base de temps.
les drapeaux sont utilisés pour coder les drapeaux de cette trame.

valeur de retour:

AOM_CODEC_OK: l'opération s'est terminée sans erreur.
AOM_CODEC_INCAPABLE: L'algorithme n'a pas de fonctions requises.
AOM_CODEC_INVALID_PARAM: Les paramètres fournis par l'application ne sont pas valides, le format d'image n'est pas pris en charge, etc.

Lorsque la dernière image a été passée à l'encodeur, vous devez continuer à appeler cette fonction et définir le paramètre img sur NULL. Cela signalera la fin du flux à l'encodeur et lui permettra d'encoder tous les tampons réservés. Lorsque aom_codec_encode () est appelé et que aom_codec_get_cx_data () ne renvoie aucune donnée, l'encodage est terminé.

Cette fonction appelle principalement encoder_encode pour l'encodage.

aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img,
                                 aom_codec_pts_t pts, unsigned long duration,
                                 aom_enc_frame_flags_t flags) {
  aom_codec_err_t res = AOM_CODEC_OK;

  if (!ctx || (img && !duration))
    res = AOM_CODEC_INVALID_PARAM;
  else if (!ctx->iface || !ctx->priv)
    res = AOM_CODEC_ERROR;
  else if (!(ctx->iface->caps & AOM_CODEC_CAP_ENCODER))
    res = AOM_CODEC_INCAPABLE;
  else {
    /* Execute in a normalized floating point environment, if the platform
     * requires it.
     */
    FLOATING_POINT_INIT
    res = ctx->iface->enc.encode(get_alg_priv(ctx), img, pts, duration, flags);
    FLOATING_POINT_RESTORE
  }

La fonction par fonction get_alg_priv () pour obtenir aom_codec_ctx_t corps structurel aom_codec_priv_t pointeur

static aom_codec_alg_priv_t *get_alg_priv(aom_codec_ctx_t *ctx) {
  return (aom_codec_alg_priv_t *)ctx->priv;
}

aom_codec_priv_t

Structure de données privées du codec.
Contient des données spécifiques à l'implémentation du codec. Cette structure n'est pas transparente pour l'application.