一、avcodec_send_frame()

libavcodec\avcodec.h

/**
 * Supply a raw video or audio frame to the encoder. Use avcodec_receive_packet()
 * to retrieve buffered output packets.
 *
 * @param avctx     codec context
 * @param[in] frame AVFrame containing the raw audio or video frame to be encoded.
 *                  Ownership of the frame remains with the caller, and the
 *                  encoder will not write to the frame. The encoder may create
 *                  a reference to the frame data (or copy it if the frame is
 *                  not reference-counted).
 *                  It can be NULL, in which case it is considered a flush
 *                  packet.  This signals the end of the stream. If the encoder
 *                  still has packets buffered, it will return them after this
 *                  call. Once flushing mode has been entered, additional flush
 *                  packets are ignored, and sending frames will return
 *                  AVERROR_EOF.
 *
 *                  For audio:
 *                  If AV_CODEC_CAP_VARIABLE_FRAME_SIZE is set, then each frame
 *                  can have any number of samples.
 *                  If it is not set, frame->nb_samples must be equal to
 *                  avctx->frame_size for all frames except the last.
 *                  The final frame may be smaller than avctx->frame_size.
 * @return 0 on success, otherwise negative error code:
 *      AVERROR(EAGAIN):   input is not accepted in the current state - user
 *                         must read output with avcodec_receive_packet() (once
 *                         all output is read, the packet should be resent, and
 *                         the call will not fail with EAGAIN).
 *      AVERROR_EOF:       the encoder has been flushed, and no new frames can
 *                         be sent to it
 *      AVERROR(EINVAL):   codec not opened, refcounted_frames not set, it is a
 *                         decoder, or requires flush
 *      AVERROR(ENOMEM):   failed to add packet to internal queue, or similar
 *      other errors: legitimate decoding errors
 */
int avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame);

libavcodec\encode.c

int attribute_align_arg avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame)
{
    if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
        return AVERROR(EINVAL);

    if (avctx->internal->draining)
        return AVERROR_EOF;

    if (!frame) {
        avctx->internal->draining = 1;

        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
            return 0;
    }

    if (avctx->codec->send_frame)
        return avctx->codec->send_frame(avctx, frame);

    // Emulation via old API. Do it here instead of avcodec_receive_packet, because:
    // 1. if the AVFrame is not refcounted, the copying will be much more
    //    expensive than copying the packet data
    // 2. assume few users use non-refcounted AVPackets, so usually no copy is
    //    needed

    if (avctx->internal->buffer_pkt_valid)
        return AVERROR(EAGAIN);

    return do_encode(avctx, frame, &(int){0});
}

从avcodec_send_frame()中可以看到，先判断AVCodec是否存在send_frame函数指针，如果存在就调用该函数，如果不存在则调用do_encode函数。本文以h264编码为例，如下：

AVCodec ff_libx264_encoder = {
    .name             = "libx264",
    .long_name        = NULL_IF_CONFIG_SMALL("libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
    .type             = AVMEDIA_TYPE_VIDEO,
    .id               = AV_CODEC_ID_H264,
    .priv_data_size   = sizeof(X264Context),
    .init             = X264_init,
    .encode2          = X264_frame,
    .close            = X264_close,
    .capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS |
                        AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
    .priv_class       = &x264_class,
    .defaults         = x264_defaults,
    .init_static_data = X264_init_static,
    .caps_internal    = FF_CODEC_CAP_INIT_CLEANUP,
    .wrapper_name     = "libx264",
};

从ff_libx264_encoder定义发现，没有定义send_frame函数指针，所以调用do_encode。如下所示：

static int do_encode(AVCodecContext *avctx, const AVFrame *frame, int *got_packet)
{
    int ret;
    *got_packet = 0;

    av_packet_unref(avctx->internal->buffer_pkt);
    avctx->internal->buffer_pkt_valid = 0;

    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
        ret = avcodec_encode_video2(avctx, avctx->internal->buffer_pkt,
                                    frame, got_packet);
    } else if (avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
        ret = avcodec_encode_audio2(avctx, avctx->internal->buffer_pkt,
                                    frame, got_packet);
    } else {
        ret = AVERROR(EINVAL);
    }

    if (ret >= 0 && *got_packet) {
        // Encoders must always return ref-counted buffers.
        // Side-data only packets have no data and can be not ref-counted.
        av_assert0(!avctx->internal->buffer_pkt->data || avctx->internal->buffer_pkt->buf);
        avctx->internal->buffer_pkt_valid = 1;
        ret = 0;
    } else {
        av_packet_unref(avctx->internal->buffer_pkt);
    }

    return ret;
}

do_encode()函数最终视频会调用avcodec_encode_video2()，音频会调用avcodec_encode_audio2()函数。

二、avcodec_receive_packet()

avcodec_receive_packet()申明在文件libavcodec\avcodec.h中，如下所示。

/**
 * Read encoded data from the encoder.
 *
 * @param avctx codec context
 * @param avpkt This will be set to a reference-counted packet allocated by the
 *              encoder. Note that the function will always call
 *              av_frame_unref(frame) before doing anything else.
 * @return 0 on success, otherwise negative error code:
 *      AVERROR(EAGAIN):   output is not available in the current state - user
 *                         must try to send input
 *      AVERROR_EOF:       the encoder has been fully flushed, and there will be
 *                         no more output packets
 *      AVERROR(EINVAL):   codec not opened, or it is an encoder
 *      other errors: legitimate decoding errors
 */
int avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt);

avcodec_receive_packet()定义在文件libavcodec\encode.c中，如下所示。

int attribute_align_arg avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
{
    av_packet_unref(avpkt);

    if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
        return AVERROR(EINVAL);

    if (avctx->codec->receive_packet) {
        if (avctx->internal->draining && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
            return AVERROR_EOF;
        return avctx->codec->receive_packet(avctx, avpkt);
    }

    // Emulation via old API.

    if (!avctx->internal->buffer_pkt_valid) {
        int got_packet;
        int ret;
        if (!avctx->internal->draining)
            return AVERROR(EAGAIN);
        ret = do_encode(avctx, NULL, &got_packet);
        if (ret < 0)
            return ret;
        if (ret >= 0 && !got_packet)
            return AVERROR_EOF;
    }

    av_packet_move_ref(avpkt, avctx->internal->buffer_pkt);
    avctx->internal->buffer_pkt_valid = 0;
    return 0;
}

从avcodec_receive_packet()中可以看到，先判断AVCodec是否存在receive_packet函数指针，如果存在就调用该函数，如果不存在也是调用do_encode函数。h264编码中也会调用do_encode()函数。从前面分析得知，do_encode()函数最终视频会调用avcodec_encode_video2()，音频会调用avcodec_encode_audio2()函数。

三、avcodec_encode_video2()

/**
 * Encode a frame of video.
 *
 * Takes input raw video data from frame and writes the next output packet, if
 * available, to avpkt. The output packet does not necessarily contain data for
 * the most recent frame, as encoders can delay and reorder input frames
 * internally as needed.
 *
 * @param avctx     codec context
 * @param avpkt     output AVPacket.
 *                  The user can supply an output buffer by setting
 *                  avpkt->data and avpkt->size prior to calling the
 *                  function, but if the size of the user-provided data is not
 *                  large enough, encoding will fail. All other AVPacket fields
 *                  will be reset by the encoder using av_init_packet(). If
 *                  avpkt->data is NULL, the encoder will allocate it.
 *                  The encoder will set avpkt->size to the size of the
 *                  output packet. The returned data (if any) belongs to the
 *                  caller, he is responsible for freeing it.
 *
 *                  If this function fails or produces no output, avpkt will be
 *                  freed using av_packet_unref().
 * @param[in] frame AVFrame containing the raw video data to be encoded.
 *                  May be NULL when flushing an encoder that has the
 *                  AV_CODEC_CAP_DELAY capability set.
 * @param[out] got_packet_ptr This field is set to 1 by libavcodec if the
 *                            output packet is non-empty, and to 0 if it is
 *                            empty. If the function returns an error, the
 *                            packet can be assumed to be invalid, and the
 *                            value of got_packet_ptr is undefined and should
 *                            not be used.
 * @return          0 on success, negative error code on failure
 *
 * @deprecated use avcodec_send_frame()/avcodec_receive_packet() instead
 */
attribute_deprecated
int avcodec_encode_video2(AVCodecContext *avctx, AVPacket *avpkt,
                          const AVFrame *frame, int *got_packet_ptr);

该函数每个参数的含义在注释里面已经写的很清楚了，在这里用中文简述一下：
@ avctx：编码器的AVCodecContext。
@ avpkt：编码输出的AVPacket。
@ frame：编码输入的AVFrame。
@ got_packet_ptr：成功编码一个AVPacket的时候设置为1。
avcodec_encode_video2()的定义位于libavcodec\utils.c，如下所示。

int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
                                              AVPacket *avpkt,
                                              const AVFrame *frame,
                                              int *got_packet_ptr)
{
    int ret;
    AVPacket user_pkt = *avpkt;
    int needs_realloc = !user_pkt.data;

    *got_packet_ptr = 0;

    if (!avctx->codec->encode2) {
        av_log(avctx, AV_LOG_ERROR, "This encoder requires using the avcodec_send_frame() API.\n");
        return AVERROR(ENOSYS);
    }

    if(CONFIG_FRAME_THREAD_ENCODER &&
       avctx->internal->frame_thread_encoder && (avctx->active_thread_type&FF_THREAD_FRAME))
        return ff_thread_video_encode_frame(avctx, avpkt, frame, got_packet_ptr);

    if ((avctx->flags&AV_CODEC_FLAG_PASS1) && avctx->stats_out)
        avctx->stats_out[0] = '\0';

    if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY) && !frame) {
        av_packet_unref(avpkt);
        return 0;
    }

    if (av_image_check_size2(avctx->width, avctx->height, avctx->max_pixels, AV_PIX_FMT_NONE, 0, avctx))
        return AVERROR(EINVAL);

    if (frame && frame->format == AV_PIX_FMT_NONE)
        av_log(avctx, AV_LOG_WARNING, "AVFrame.format is not set\n");
    if (frame && (frame->width == 0 || frame->height == 0))
        av_log(avctx, AV_LOG_WARNING, "AVFrame.width or height is not set\n");

    av_assert0(avctx->codec->encode2);

    ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
    av_assert0(ret <= 0);

    emms_c();

    if (avpkt->data && avpkt->data == avctx->internal->byte_buffer) {
        needs_realloc = 0;
        if (user_pkt.data) {
            if (user_pkt.size >= avpkt->size) {
                memcpy(user_pkt.data, avpkt->data, avpkt->size);
            } else {
                av_log(avctx, AV_LOG_ERROR, "Provided packet is too small, needs to be %d\n", avpkt->size);
                avpkt->size = user_pkt.size;
                ret = -1;
            }
            avpkt->buf      = user_pkt.buf;
            avpkt->data     = user_pkt.data;
        } else if (!avpkt->buf) {
            ret = av_packet_make_refcounted(avpkt);
            if (ret < 0)
                return ret;
        }
    }

    if (!ret) {
        if (!*got_packet_ptr)
            avpkt->size = 0;
        else if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
            avpkt->pts = avpkt->dts = frame->pts;

        if (needs_realloc && avpkt->data) {
            ret = av_buffer_realloc(&avpkt->buf, avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
            if (ret >= 0)
                avpkt->data = avpkt->buf->data;
        }

        if (frame)
            avctx->frame_number++;
    }

    if (ret < 0 || !*got_packet_ptr)
        av_packet_unref(avpkt);

    return ret;
}

从函数的定义可以看出，avcodec_encode_video2()首先调用了av_image_check_size()检查设置的宽高参数是否合理，然后调用了AVCodec的encode2()调用具体的解码器。

3.1 av_image_check_size2()

int av_image_check_size2(unsigned int w, unsigned int h, int64_t max_pixels, enum AVPixelFormat pix_fmt, int log_offset, void *log_ctx)
{
    ImgUtils imgutils = {
        .class      = &imgutils_class,
        .log_offset = log_offset,
        .log_ctx    = log_ctx,
    };
    int64_t stride = av_image_get_linesize(pix_fmt, w, 0);
    if (stride <= 0)
        stride = 8LL*w;
    stride += 128*8;

    if ((int)w<=0 || (int)h<=0 || stride >= INT_MAX || stride*(uint64_t)(h+128) >= INT_MAX) {
        av_log(&imgutils, AV_LOG_ERROR, "Picture size %ux%u is invalid\n", w, h);
        return AVERROR(EINVAL);
    }

    if (max_pixels < INT64_MAX) {
        if (w*(int64_t)h > max_pixels) {
            av_log(&imgutils, AV_LOG_ERROR,
                    "Picture size %ux%u exceeds specified max pixel count %"PRId64", see the documentation if you wish to increase it\n",
                    w, h, max_pixels);
            return AVERROR(EINVAL);
        }
    }

    return 0;
}

从代码中可以看出，av_image_check_size()主要是要求图像宽高必须为正数，而且取值不能太大。

3.2 AVCodec->encode2()

从ff_libx264_encoder的定义可以看出，encode2()函数指向的是X264_frame()函数。

static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
                      int *got_packet)
{
    X264Context *x4 = ctx->priv_data;
    x264_nal_t *nal;
    int nnal, i, ret;
    x264_picture_t pic_out = {0};
    int pict_type;
    int bit_depth;
    int64_t *out_opaque;
    AVFrameSideData *sd;

    x264_picture_init( &x4->pic );
    x4->pic.img.i_csp   = x4->params.i_csp;
#if X264_BUILD >= 153
    bit_depth = x4->params.i_bitdepth;
#else
    bit_depth = x264_bit_depth;
#endif
    if (bit_depth > 8)
        x4->pic.img.i_csp |= X264_CSP_HIGH_DEPTH;
    x4->pic.img.i_plane = avfmt2_num_planes(ctx->pix_fmt);

    if (frame) {
        for (i = 0; i < x4->pic.img.i_plane; i++) {
            x4->pic.img.plane[i]    = frame->data[i];
            x4->pic.img.i_stride[i] = frame->linesize[i];
        }

        x4->pic.i_pts  = frame->pts;

        x4->reordered_opaque[x4->next_reordered_opaque] = frame->reordered_opaque;
        x4->pic.opaque = &x4->reordered_opaque[x4->next_reordered_opaque];
        x4->next_reordered_opaque++;
        x4->next_reordered_opaque %= x4->nb_reordered_opaque;

        switch (frame->pict_type) {
        case AV_PICTURE_TYPE_I:
            x4->pic.i_type = x4->forced_idr > 0 ? X264_TYPE_IDR
                                                : X264_TYPE_KEYFRAME;
            break;
        case AV_PICTURE_TYPE_P:
            x4->pic.i_type = X264_TYPE_P;
            break;
        case AV_PICTURE_TYPE_B:
            x4->pic.i_type = X264_TYPE_B;
            break;
        default:
            x4->pic.i_type = X264_TYPE_AUTO;
            break;
        }
        reconfig_encoder(ctx, frame);

        if (x4->a53_cc) {
            void *sei_data;
            size_t sei_size;

            ret = ff_alloc_a53_sei(frame, 0, &sei_data, &sei_size);
            if (ret < 0) {
                av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
            } else if (sei_data) {
                x4->pic.extra_sei.payloads = av_mallocz(sizeof(x4->pic.extra_sei.payloads[0]));
                if (x4->pic.extra_sei.payloads == NULL) {
                    av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
                    av_free(sei_data);
                } else {
                    x4->pic.extra_sei.sei_free = av_free;

                    x4->pic.extra_sei.payloads[0].payload_size = sei_size;
                    x4->pic.extra_sei.payloads[0].payload = sei_data;
                    x4->pic.extra_sei.num_payloads = 1;
                    x4->pic.extra_sei.payloads[0].payload_type = 4;
                }
            }
        }

        sd = av_frame_get_side_data(frame, AV_FRAME_DATA_REGIONS_OF_INTEREST);
        if (sd) {
            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
                av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
            } else {
                if (frame->interlaced_frame == 0) {
                    int mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
                    int mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
                    int qp_range = 51 + 6 * (bit_depth - 8);
                    int nb_rois;
                    const AVRegionOfInterest *roi;
                    uint32_t roi_size;
                    float *qoffsets;

                    roi = (const AVRegionOfInterest*)sd->data;
                    roi_size = roi->self_size;
                    if (!roi_size || sd->size % roi_size != 0) {
                        av_log(ctx, AV_LOG_ERROR, "Invalid AVRegionOfInterest.self_size.\n");
                        return AVERROR(EINVAL);
                    }
                    nb_rois = sd->size / roi_size;

                    qoffsets = av_mallocz_array(mbx * mby, sizeof(*qoffsets));
                    if (!qoffsets)
                        return AVERROR(ENOMEM);

                    // This list must be iterated in reverse because the first
                    // region in the list applies when regions overlap.
                    for (int i = nb_rois - 1; i >= 0; i--) {
                        int startx, endx, starty, endy;
                        float qoffset;

                        roi = (const AVRegionOfInterest*)(sd->data + roi_size * i);

                        starty = FFMIN(mby, roi->top / MB_SIZE);
                        endy   = FFMIN(mby, (roi->bottom + MB_SIZE - 1)/ MB_SIZE);
                        startx = FFMIN(mbx, roi->left / MB_SIZE);
                        endx   = FFMIN(mbx, (roi->right + MB_SIZE - 1)/ MB_SIZE);

                        if (roi->qoffset.den == 0) {
                            av_free(qoffsets);
                            av_log(ctx, AV_LOG_ERROR, "AVRegionOfInterest.qoffset.den must not be zero.\n");
                            return AVERROR(EINVAL);
                        }
                        qoffset = roi->qoffset.num * 1.0f / roi->qoffset.den;
                        qoffset = av_clipf(qoffset * qp_range, -qp_range, +qp_range);

                        for (int y = starty; y < endy; y++) {
                            for (int x = startx; x < endx; x++) {
                                qoffsets[x + y*mbx] = qoffset;
                            }
                        }
                    }

                    x4->pic.prop.quant_offsets = qoffsets;
                    x4->pic.prop.quant_offsets_free = av_free;
                } else {
                    av_log(ctx, AV_LOG_WARNING, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n");
                }
            }
        }
    }

    do {
        if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0)
            return AVERROR_EXTERNAL;

        ret = encode_nals(ctx, pkt, nal, nnal);
        if (ret < 0)
            return ret;
    } while (!ret && !frame && x264_encoder_delayed_frames(x4->enc));

    pkt->pts = pic_out.i_pts;
    pkt->dts = pic_out.i_dts;

    out_opaque = pic_out.opaque;
    if (out_opaque >= x4->reordered_opaque &&
        out_opaque < &x4->reordered_opaque[x4->nb_reordered_opaque]) {
        ctx->reordered_opaque = *out_opaque;
    } else {
        // Unexpected opaque pointer on picture output
        ctx->reordered_opaque = 0;
    }

    switch (pic_out.i_type) {
    case X264_TYPE_IDR:
    case X264_TYPE_I:
        pict_type = AV_PICTURE_TYPE_I;
        break;
    case X264_TYPE_P:
        pict_type = AV_PICTURE_TYPE_P;
        break;
    case X264_TYPE_B:
    case X264_TYPE_BREF:
        pict_type = AV_PICTURE_TYPE_B;
        break;
    default:
        pict_type = AV_PICTURE_TYPE_NONE;
    }
#if FF_API_CODED_FRAME
FF_DISABLE_DEPRECATION_WARNINGS
    ctx->coded_frame->pict_type = pict_type;
FF_ENABLE_DEPRECATION_WARNINGS
#endif

    pkt->flags |= AV_PKT_FLAG_KEY*pic_out.b_keyframe;
    if (ret) {
        ff_side_data_set_encoder_stats(pkt, (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA, NULL, 0, pict_type);

#if FF_API_CODED_FRAME
FF_DISABLE_DEPRECATION_WARNINGS
        ctx->coded_frame->quality = (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA;
FF_ENABLE_DEPRECATION_WARNINGS
#endif
    }

    *got_packet = ret;
    return 0;
}

有关X264编码的代码在以后分析X264的时候再进行详细分析。在这里我们可以我们可以简单看出该函数中有一个do while循环，其中调用了x264_encoder_encode()完成了编码的工作。

ffmpeg源码分析：avcodec_send_frame和avcodec_receive_packet（最终调用avcodec_encode_video2()）

一、avcodec_send_frame()

二、avcodec_receive_packet()

三、avcodec_encode_video2()

3.1 av_image_check_size2()

3.2 AVCodec->encode2()

猜你喜欢