福州大学《嵌入式系统综合设计》实验十：FFMPEG视频解码

一、实验目的

在掌握FFMPEG编码的基础上进一步掌握FFMPEG视频解码的流程，包括开发主机环境与云平台的配置，视频解码程序的理解，代码的编译、运行等。重点掌握FFMPEG解码过程中常用的API函数接口，常用的结构体，解码的基本流程，以及关键步骤。

二、实验内容

搭建实验开发环境，编译并运行解码程序，对编码后的视频码流进行解码。

三、开发环境

开发主机：Ubuntu 22.04 LTS

硬件：算能SE5

四、实验器材

开发主机 + 云平台（或SE5硬件）

五、实验过程与结论

5.1 FFMPEG解码原理与流程

FFMPEG同样提供了接口用于对视频压缩文件进行解码，支持H264、H265、MJPEG等视频文件解码。支持解码输出YUV文件。FFMPEG不仅可以支持对视频压缩文件进行解码，同样支持对音频文件进行解码。比如将音频AAC帧通过解码器解码为PCM数据。

FFMPEG解码的基本流程在初始化部分与FFMPEG编码类似，但是在解码过程中主要调用了av_read_frame和avcodec_decode_video2函数进行解码。其中av_read_frame循环的从缓存中读取一帧的视频帧数据，avcodec_decode_video2函数则负责解码一帧压缩数据，标准的FFMPEG解码的基本流程如下图所示：

需要注意的是，由于算能的FFMPEG接口采用硬件进行加速，在PCIE模式下需要指定加速卡，以及内存同步等操作，参考如下代码：

#ifdef BM_PCIE_MODE
    av_dict_set_int(&opts, "zero_copy", pcie_no_copyback, 0);
    av_dict_set_int(&opts, "sophon_idx", sophon_idx, 0);
#endif
    if(output_format_mode == 101)
        av_dict_set_int(&opts, "output_format", output_format_mode, 18);

//if(extra_frame_buffer_num > 5) 
    av_dict_set_int(&opts, "extra_frame_buffer_num", extra_frame_buffer_num, 0);  
//av_dict_set_int(&opts, "extra_frame_buffer_num", 1, 0);

5.2 FFMPEG解码关键函数

主函数：

    FILE *fp_yuv = fopen(output_file.data(), "wb+");
    av_log_set_level(AV_LOG_DEBUG); // set debug level
    reader.openDec(input_file.data(), 1, "h264_bm", 100, 60, 0, 0);

    pFormatCtx = reader.ifmt_ctx;

    videoindex = -1;
    for (i = 0; i < pFormatCtx->nb_streams; i++) {
        if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
        {
            videoindex = i;
            break;
        }
    }
    if (videoindex == -1)
    {
        printf("Didn't find a video stream.\n");
        return -1;
    }
    cout << "video index " << videoindex << endl;

    pCodecCtx = reader.video_dec_ctx;
    pCodec = reader.decoder;

    pFrame = av_frame_alloc();
    pFrameYUV = av_frame_alloc();
    out_buffer = (uint8_t *)av_malloc(avpicture_get_size(AV_PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height));
    avpicture_fill((AVPicture *)pFrameYUV, out_buffer, AV_PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height);
    packet = (AVPacket *)av_malloc(sizeof(AVPacket));

    av_dump_format(pFormatCtx, 0, input_file.data(), 0);

    img_convert_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height, AV_PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);
    long long framecount = 0;

    while (av_read_frame(pFormatCtx, packet) >= 0)
    {    //读取一帧压缩数据
        if (packet->stream_index == videoindex)
        {
            //解码一帧压缩数据
            ret = avcodec_decode_video2(pCodecCtx, pFrame, &got_picture, packet); 
            if (ret < 0)
            {
                printf("Decode Error.\n");
                return -1;
            }
            if (got_picture)
            {
                sws_scale(img_convert_ctx, (const uint8_t *const *)pFrame->data, pFrame->linesize, 0, pCodecCtx->height,pFrameYUV->data, pFrameYUV->linesize);
                y_size = pCodecCtx->width * pCodecCtx->height;
                fwrite(pFrameYUV->data[0], 1, y_size, fp_yuv);     // Y
                fwrite(pFrameYUV->data[1], 1, y_size / 4, fp_yuv); // U
                fwrite(pFrameYUV->data[2], 1, y_size / 4, fp_yuv); // V
                printf("\rfinish %lld [%c].", ++framecount, progressbar_icon[framecount % 12]);
                fflush(stdout);
            }
        }
        av_free_packet(packet);
    }
    // flush decoder
    /*当av_read_frame()循环退出的时候，实际上解码器中可能还包含剩余的几帧数据。
    因此需要通过“flush_decoder”将这几帧数据输出。
   “flush_decoder”功能简而言之即直接调用avcodec_decode_video2()获得AVFrame，而不再向解码器传递AVPacket。*/
    while (1)
    {
        ret = avcodec_decode_video2(pCodecCtx, pFrame, &got_picture, packet);
        if (ret < 0)
            break;
        if (!got_picture)
            break;
        sws_scale(img_convert_ctx, (const uint8_t *const *)pFrame->data, pFrame->linesize, 0, pCodecCtx->height,
            pFrameYUV->data, pFrameYUV->linesize);
        int y_size = pCodecCtx->width * pCodecCtx->height;
        //输出到文件
        fwrite(pFrameYUV->data[0], 1, y_size, fp_yuv);     // Y
        fwrite(pFrameYUV->data[1], 1, y_size / 4, fp_yuv); // U
        fwrite(pFrameYUV->data[2], 1, y_size / 4, fp_yuv); // V

        printf("\rfinish %lld [%c].",++framecount, progressbar_icon[framecount % 12]);
        fflush(stdout);
    }

    sws_freeContext(img_convert_ctx);

    //关闭文件以及释放内存
    fclose(fp_yuv);
    cout << "Total Decode " << framecount << " frames" << endl;
    av_frame_free(&pFrameYUV);
    av_frame_free(&pFrame);

通过上面的代码可以发现，这里调用了openDec开启解码器，然后通过grabFrame获取解码后的视频帧，实际就是完成解码动作。这两个接口都通过VideoDec_FFMPEG类来提供。下面我们介绍一下VideoDec_FFMPEG类的写法，也是本实例的核心。

VideoDec_FFMPEG类

初始化构造函数：

VideoDec_FFMPEG::VideoDec_FFMPEG()
{
    ifmt_ctx = NULL;
    video_dec_ctx = NULL;
    video_dec_par = NULL;
    decoder = NULL;
    width   = 0;
    height  = 0;
    pix_fmt = 0;
    video_stream_idx = -1;
    refcount = 1;

    av_init_packet(&pkt);
    pkt.data = NULL;
    pkt.size = 0;
    frame = av_frame_alloc();
}

析构函数：

VideoDec_FFMPEG::~VideoDec_FFMPEG()
{
    closeDec();
    printf("#VideoDec_FFMPEG exit \n");
}

关键函数openDec：

int VideoDec_FFMPEG::openDec(const char* filename,int codec_name_flag,
                             const char *coder_name,int output_format_mode,
                             int extra_frame_buffer_num,int sophon_idx, int pcie_no_copyback)//文件的打开，解码器的初始化
{
    int ret = 0;
    AVDictionary *dict = NULL;
    av_dict_set(&dict, "rtsp_flags", "prefer_tcp", 0);
    //打开媒体流
    ret = avformat_open_input(&ifmt_ctx, filename, NULL, &dict);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Cannot open input file\n");
        return ret;
}
    //获取媒体信息
    ret = avformat_find_stream_info(ifmt_ctx, NULL);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n");
        return ret;
    }
    //打开编码器（二次封装函数）
    ret = openCodecContext(&video_stream_idx, &video_dec_ctx, ifmt_ctx,             AVMEDIA_TYPE_VIDEO,codec_name_flag, coder_name, output_format_mode, extra_frame_buffer_num);
    if (ret >= 0) {
        width   = video_dec_ctx->width;
        height  = video_dec_ctx->height;
        pix_fmt = video_dec_ctx->pix_fmt;
    }
    av_log(video_dec_ctx, AV_LOG_INFO,
           "openDec video_stream_idx = %d, pix_fmt = %d\n",video_stream_idx, pix_fmt);
    av_dict_free(&dict);
    return ret;
}

从上面的代码我们可以发现，这里通过 avformat_open_input打开媒体流，通过avformat_find_stream_info函数获取了媒体信息。并且调用了openCodecContext打开编码器。这里openCodecContext是二次封装函数，实现方法如下：

int VideoDec_FFMPEG::openCodecContext(int *stream_idx,AVCodecContext **dec_ctx, AVFormatContext *fmt_ctx, enum AVMediaType type,int codec_name_flag, const char *coder_name, int output_format_mode,int extra_frame_buffer_num, int sophon_idx, int pcie_no_copyback)
{
    int ret, stream_index;
    AVStream *st;
    AVCodec *dec = NULL;
    AVDictionary *opts = NULL;
    ret = av_find_best_stream(fmt_ctx, type, -1, -1, NULL, 0);
    if (ret < 0) {
        av_log(NULL, AV_LOG_ERROR, "Could not find %s stream\n", av_get_media_type_string(type));
        return ret;
    }
    stream_index = ret;
    st = fmt_ctx->streams[stream_index];
    /* find decoder for the stream */
    if(codec_name_flag && coder_name)
        decoder = findBmDecoder((AVCodecID)0,coder_name,codec_name_flag, AVMEDIA_TYPE_VIDEO);
    else
        decoder = findBmDecoder(st->codecpar->codec_id);
    if (!decoder) {
        av_log(NULL, AV_LOG_FATAL,"Failed to find %s codec\n",
               av_get_media_type_string(type));
        return AVERROR(EINVAL);
    }
    /* Allocate a codec context for the decoder */
    *dec_ctx = avcodec_alloc_context3(decoder);
    if (!*dec_ctx) {
        av_log(NULL, AV_LOG_FATAL, "Failed to allocate the %s codec context\n",
        av_get_media_type_string(type));
        return AVERROR(ENOMEM);
    }
/* Copy codec parameters from input stream to output codec context */
    ret = avcodec_parameters_to_context(*dec_ctx, st->codecpar);
    if (ret < 0) {
        av_log(NULL, AV_LOG_FATAL, "Failed to copy %s codec parameters to decoder context\n", av_get_media_type_string(type));
        return ret;
    }
    video_dec_par = st->codecpar;
    /* Init the decoders, with or without reference counting */
    av_dict_set(&opts, "refcounted_frames", refcount ? "1" : "0", 0);
    if(output_format_mode == 101)
        av_dict_set_int(&opts, "output_format", output_format_mode, 18);
    av_dict_set_int(&opts, "extra_frame_buffer_num", extra_frame_buffer_num, 0); 
    ret = avcodec_open2(*dec_ctx, dec, &opts);
    if (ret < 0) {
        av_log(NULL, AV_LOG_FATAL, "Failed to open %s codec\n",
               av_get_media_type_string(type));
        return ret;
    }
    *stream_idx = stream_index;

    av_dict_free(&opts);

    return 0;
}

从上面的代码，我们可以发现，openCodecContext打开编码器过程中主要用到了findBmDecoder查找编码器（二次封装函数），avcodec_alloc_context3分配编码context，avcodec_parameters_to_context设置编码器以及利用av_dict_set_int函数进行一些参数设置。最后通过avcodec_open2打开编码器。

关键函数grabFrame：解码视频帧

AVFrame * VideoDec_FFMPEG::grabFrame()//返回一帧解码的结果
{
    int ret = 0;
    int got_frame = 0;
    struct timeval tv1, tv2;
    gettimeofday(&tv1, NULL);
    while (1) {
        av_packet_unref(&pkt);
        ret = av_read_frame(ifmt_ctx, &pkt);
        if (ret < 0) {
        if (ret == AVERROR(EAGAIN)) {
                gettimeofday(&tv2, NULL);
                if(((tv2.tv_sec - tv1.tv_sec) * 1000 + (tv2.tv_usec - tv1.tv_usec) / 1000) > 1000*60) {
                    av_log(video_dec_ctx, AV_LOG_WARNING, "av_read_frame failed ret(%d) retry time >60s.\n", ret);
                    break;
                }
                usleep(10*1000);
                continue;
         }
            av_log(video_dec_ctx, AV_LOG_ERROR, "av_read_frame ret(%d) maybe eof...\n", ret);
            return NULL; // TODO
        }
        if (pkt.stream_index != video_stream_idx) {
            continue;
        }
        if (!frame) {
            av_log(video_dec_ctx, AV_LOG_ERROR, "Could not allocate frame\n");
            return NULL;
        }
        if (refcount) {
            av_frame_unref(frame);
        }
        gettimeofday(&tv1, NULL);
        
        ret = avcodec_decode_video2(video_dec_ctx, frame, &got_frame, &pkt);
        if (ret < 0) {
            av_log(video_dec_ctx, AV_LOG_ERROR, "Error decoding video frame (%d)\n",  ret);
            continue; // TODO
        }
        if (!got_frame) {
            continue;
        }
        width   = video_dec_ctx->width;
        height  = video_dec_ctx->height;
        pix_fmt = video_dec_ctx->pix_fmt;
        if (frame->width!= width||frame->height != height || frame->format != pix_fmt){
            av_log(video_dec_ctx, AV_LOG_ERROR,
                   "Error: Width, height and pixel format have to be "
                   "constant in a rawvideo file, but the width, height or "
                   "pixel format of the input video changed:\n"
                   "old: width = %d, height = %d, format = %s\n"
                   "new: width = %d, height = %d, format = %s\n",
                   width, height, av_get_pix_fmt_name((AVPixelFormat)pix_fmt),
                   frame->width, frame->height,
                   av_get_pix_fmt_name((AVPixelFormat)frame->format));
            continue;
        }
        break;
    }
    return frame;

最后使用完毕后关闭解码器：

void VideoDec_FFMPEG::closeDec()
{
    if (video_dec_ctx) {
        avcodec_free_context(&video_dec_ctx);
        video_dec_ctx = NULL;
    }
    if (ifmt_ctx) {
        avformat_close_input(&ifmt_ctx);
        ifmt_ctx = NULL;
    }
    if (frame) {
        av_frame_free(&frame);
        frame = NULL;
    }
}

5.3 解码实验过程

生成可执行文件：

按照上述实验步骤，生成可执行文件并上传到算能嵌入式平台或者云平台中，具体操作不在赘述。此时test文件夹内的文件如图所示。

root@d11ae417e206:/tmp/test# ls

ffmpeg_decoder test.h264

这里的test.h264可以用实验8中生成的编码文件。

给可执行文件赋权限并执行。

root@d11ae417e206:/tmp/test# chmod 777 ffmpeg_decoder

运行指令：

生成并上传编译文件后，根据如下指令在目标开发机终端运行，其中具体的指令参数设置将在下面详细介绍。

./ffmpeg_decoder test.h264 out.yuv

运行结果如下：

其中运行结果界面中显示的参数stream表示输入的码流文件，bm decoder id表示使用的解码器名称，sophon device表示PCIE模式下使用的sophon芯片序号，bm output format表示输出数据的格式，mode bitstream表示比特流模式，frame delay表示解码器延迟帧数，pix_fmt表示解码器支持的像素格式。关键部分见下绿色字体部分：

root@ab162899a93b:/tmp/tmp6l8uq_dw# ./ffmpeg_decoder test111.h264 out.yuv

[NULL @ 0x449a10] Opening 'test111.h264' for reading

[file @ 0x44a240] Setting default whitelist 'file,crypto'

[h264 @ 0x449a10] Format h264 probed with size=2048 and score=51

[h264 @ 0x449a10] Before avformat_find_stream_info() pos: 0 bytes read:32768 seeks:0 nb_streams:1

[AVBSFContext @ 0x4521c0] nal_unit_type: 7(SPS), nal_ref_idc: 3

[AVBSFContext @ 0x4521c0] nal_unit_type: 8(PPS), nal_ref_idc: 3

[AVBSFContext @ 0x4521c0] nal_unit_type: 5(IDR), nal_ref_idc: 3

[h264 @ 0x44acc0] nal_unit_type: 7(SPS), nal_ref_idc: 3

[h264 @ 0x44acc0] nal_unit_type: 8(PPS), nal_ref_idc: 3

[h264 @ 0x44acc0] nal_unit_type: 5(IDR), nal_ref_idc: 3

[h264 @ 0x44acc0] Format yuv420p chosen by get_format().

[h264 @ 0x44acc0] Reinit context to 1920x1088, pix_fmt: yuv420p

[h264 @ 0x44acc0] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 2

[h264 @ 0x44acc0] no picture

[h264 @ 0x44acc0] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 1

[h264 @ 0x44acc0] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 1

[h264 @ 0x44acc0] Increasing reorder buffer to 2

[h264 @ 0x44acc0] no picture ooo

[h264 @ 0x44acc0] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 0

[h264 @ 0x44acc0] Increasing reorder buffer to 3

[h264 @ 0x44acc0] no picture ooo

[h264 @ 0x44acc0] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 0

[h264 @ 0x44acc0] no picture ooo

[h264 @ 0x44acc0] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 1

[h264 @ 0x44acc0] no picture

[h264 @ 0x44acc0] nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 0

[h264 @ 0x449a10] After avformat_find_stream_info() pos: 520810 bytes read:520810 seeks:0 frames:101

[AVBSFContext @ 0x4521c0] The input looks like it is Annex B already

[h264_bm @ 0x44c170] Format nv12 chosen by get_format().

[h264_bm @ 0x44c170] ff_get_format: nv12.

[h264_bm @ 0x44c170] bmctx->hw_accel=0

[h264_bm @ 0x44c170] bm decoder id: 0

[h264_bm @ 0x44c170] bm output format: 0

[h264_bm @ 0x44c170] mode bitstream: 2, frame delay: -1

BMvidDecCreateW5 board id 0 coreid 0

libbmvideo.so addr : /system/lib/libbmvideo.so, name_len: 12

vpu firmware addr: /system/lib/vpu_firmware/chagall_dec.bin

VERSION=0, REVISION=213135

[h264_bm @ 0x44c170] perf: 0

[h264_bm @ 0x44c170] init options: mode, 2, frame delay, -1, output format, 0, extra frame buffer number: 5, extra_data_flag: 1

[h264_bm @ 0x44c170] openDec video_stream_idx = 0, pix_fmt = 23

video index 0

Input #0, h264, from 'test111.h264':

Duration: N/A, bitrate: N/A

Stream #0:0, 101, 1/1200000: Video: h264 (High), 1 reference frame, nv12(progressive, left), 1920x1080 (1920x1088), 0/1, 25 fps, 25 tbr, 1200k tbn, 50 tbc

finish 96 [|].[h264_bm @ 0x44c170] flush all frame in the decoder frame buffer

may be endof.. please check it.............

may be endof.. please check it.............

may be endof.. please check it.............

may be endof.. please check it.............

finish 101 [/].Total Decode 101 frames

[AVIOContext @ 0x42f6d0] Statistics: 520810 bytes read, 0 seeks

#VideoDec_FFMPEG exit

root@ab162899a93b:/tmp/tmp6l8uq_dw#