FFmpeg 封装、解封装及解码的流程简介

背景
流程简介

1、解封装
2、解码
3、例子1.解封装解码流程
4、封装
5、例子2.转封装流程

背景

在工作过程中，因为我们项目的播放器是基于FFmpeg，所以经常也涉及到FFmpeg的使用、扩展及调优。但从未好好对该模块进行过一些文档总结，所以想着先把FFmpeg基本的API调用流程简单描述，以作参考。
本文描述解封装->解码调用流程，以及一个转换封装格式的流程（即解封装->重新封装），以到达了解FFmpeg基本API调用的目的。
FFmpeg工程源码中有很多example，我们也可以参考学习。

流程简介

需要使用的模块：
1、libavformat：用于各种音视频封装格式的生成和解析，包括获取解码所需信息以生成解码上下文结构和读取音视频帧等功能，包含demuxers和muxer库；
2、libavcodec：用于各种类型声音/图像编解码；
3、libavutil：包含一些公共的工具函数；
无论是解封装，解码或者封装，准备阶段，均需要先使用 av_register_all();接口将相关的demuxer/mux及codec注册进来。
如果涉及到网络相关操作，还需要调用avformat_network_init();注册一些网络协议相关的东西。

1、解封装

步骤如下：
1、注册相关模块（av_register_all; avformat_network_init）
2、打开文件、获取封装信息上下文AVFormatContext（avformat_open_input）
3、获取媒体文件音视频信息，这一步会将AVFormatContext内部变量填充（avformat_find_stream_info）
4、获取音视频流ID。一般有两种方法：1）遍历AVFormatContext内部所有的stream，如果stream的codec_type对应为audio/video，这记录当前stream的ID；2）FFmpeg提供av_find_best_stream接口，可以直接获取相应类型（audio or video）的流ID
5、获取流的每一帧数据（av_read_frame）
6、关闭文件

2、解码

解码在解封装的基础上，将每一帧数据进行解码。步骤如下：
1、申请解码器上下文AVCodecContext（avcodec_alloc_context3）
2、初始化AVCodecContext参数，可以使用将解封装得到的流的解码器参数设置进来（avcodec_parameters_to_context）
3、打开解码器（avcodec_open2）
4、解码每一帧数据。这里有两种方法：1）老的FFmpeg接口中，对音视频解码器需要调用不同的解码接口，如音频avcodec_decode_audio4，视频avcodec_decode_video()；2）新的FFmpeg接口中，只需要将解封装获取的帧传递给解码器（avcodec_send_packet），再接收即可（avcodec_receive_frame）
5、关闭文件和解码器

3、例子1.解封装解码流程

例子只涉及到音频操作，视频的操作流程也一样。我找了一个左右声道不一样音乐的MP3文件，代码解封装并解封装为PCM数据，并将其左右声道数据分别保存到两个文件中，这样可以明显听到两个不同的音乐。
当然，这个代码只是简单的根据上面解封装解码流程完成的粗糙示例代码，缺少应有的判空判错的必要流程。

/**
 * 最简单的基于FFmpeg的解码器测试代码
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <android/log.h> 
#include <unistd.h>

#define LOGI(...)  __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
#define LOGE(...)  __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
#define LOG_TAG "FFmpeg-test"


#ifdef __cplusplus
extern "C"
{
#endif
#define __STDC_CONSTANT_MACROS
#ifdef _STDINT_H
#undef _STDINT_H
#endif
#include <stdint.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswresample/swresample.h>
#ifdef __cplusplus
};
#endif


int main(int argc, char* argv[]){
    //init
    av_register_all();
    avformat_network_init();
    //open
    AVFormatContext *mFormatContext = NULL;
    avformat_open_input(&mFormatContext, argv[1],NULL,NULL);
    //get info
    avformat_find_stream_info(mFormatContext,NULL);
    int audio_idx = av_find_best_stream(mFormatContext,AVMEDIA_TYPE_AUDIO,-1,-1,NULL,0);
    LOGI("[OPEN] file =%s [AUDIO:%d]\n",argv[1],audio_idx);
    FILE *audio_dst_file1 = fopen("/data/1.pcm", "wb");
    FILE *audio_dst_file2 = fopen("/data/2.pcm", "wb");
    //get decode
    AVCodec *pAudioCodec = avcodec_find_decoder(mFormatContext->streams[audio_idx]->codecpar->codec_id);
    AVCodecContext *pACodecCxt = avcodec_alloc_context3(pAudioCodec);
    avcodec_parameters_to_context(pACodecCxt, mFormatContext->streams[audio_idx]->codecpar);
    avcodec_open2(pACodecCxt,pAudioCodec,NULL);

#if 0    //use old mode
    //demux+decode
    AVPacket *mPacket = av_packet_alloc();
    AVFrame *mFrame = av_frame_alloc();
    int got = -1;
    size_t size = -1;
    int i = 0;
    while(av_read_frame(mFormatContext,mPacket) == 0){//demux
         if(mPacket->stream_index == audio_idx){
            LOGI("[AUDIO] size =%d pts=%lld flag=%d\n",mPacket->size,mPacket->pts,mPacket->flags);
            got = 0;
            avcodec_decode_audio4(pACodecCxt,mFrame,&got,mPacket);//decode
            if(got > 0){
                AVSampleFormat sf = AVSampleFormat(mFrame->format);
                size = mFrame->nb_samples * av_get_bytes_per_sample(sf);
                LOGI("[AUDIO] got one frame size[%d]  format[%d] \n",size, mFrame->format);
                if(av_sample_fmt_is_planar(sf)){
                    if(mFrame->extended_data[0] != NULL){
                        fwrite(mFrame->extended_data[0], 1, size, audio_dst_file1);
                    }
                    if(mFrame->extended_data[1] != NULL){
                        fwrite(mFrame->extended_data[1], 1, size, audio_dst_file2);
                    }
                }else{
                    if((mFrame->data[0] + i) != NULL){
                        if(i%2 == 0) fwrite((mFrame->data[0] + i), 1, size, audio_dst_file1);  
                        else fwrite((mFrame->data[0] + i), 1, size, audio_dst_file2); 
                    }
                }
            }
            av_frame_unref(mFrame);
        }
        av_packet_unref(mPacket);
    }
#else //use new mode
    //demux+decode
    AVPacket *mPacket = av_packet_alloc();
    AVFrame *mFrame = av_frame_alloc();
    int ret = -1;
    size_t size = -1;
    int i = 0;
    while(av_read_frame(mFormatContext,mPacket) == 0){//demux
         if(mPacket->stream_index == audio_idx){
            LOGI("[AUDIO] size =%d pts=%lld flag=%d\n",mPacket->size,mPacket->pts,mPacket->flags);
            ret = avcodec_send_packet(pACodecCxt, mPacket);
            if(ret == 0){
                
                while(avcodec_receive_frame(pACodecCxt,mFrame) == 0){//decode
                    AVSampleFormat sf = AVSampleFormat(mFrame->format);
                    size = mFrame->nb_samples * av_get_bytes_per_sample(sf);
                    LOGI("[AUDIO] got one frame size[%d]  format[%d] \n",size, mFrame->format);
                    if(av_sample_fmt_is_planar(sf)){
                        if(mFrame->extended_data[0] != NULL){
                            fwrite(mFrame->extended_data[0], 1, size, audio_dst_file1);
                        }
                        if(mFrame->extended_data[1] != NULL){
                            fwrite(mFrame->extended_data[1], 1, size, audio_dst_file2);
                        }
                    }else{
                        if((mFrame->data[0] + i) != NULL){
                            if(i%2 == 0) fwrite((mFrame->data[0] + i), 1, size, audio_dst_file1);  
                            else fwrite((mFrame->data[0] + i), 1, size, audio_dst_file2); 
                        }
                    }
                    
                    av_frame_unref(mFrame);
                }
                
            }
        }
        av_packet_unref(mPacket);
    }

#endif
    //close
    av_packet_free(&mPacket);
    av_frame_free(&mFrame);
    avcodec_close(pACodecCxt);
    avformat_close_input(&mFormatContext);
    fclose(audio_dst_file1);
    fclose(audio_dst_file2);
    
    return 0;
}

4、封装

步骤如下：
1、注册相关模块（av_register_all; avformat_network_init）
2、根据即将输出的文件名、获取封装信息上下文AVFormatContext（avformat_alloc_output_context2）
3、打开输出文件IO（avio_open）
4、添加音视频流（avformat_new_stream）
5、封装文件头信息（avformat_write_header）
6、向文件中写入数据包，如果包含视频、音频等多个码流的数据包，则按照时间戳大小交织写入（av_interleaved_write_frame）
7、封装文件尾信息（av_write_trailer）
8、关闭操作

5、例子2.转封装流程

转换封装，即将解封装和封装流程组合使用，当然，其中一些时间戳，在重新封装时需要转换一下。
按照流程步骤来完成的转封装代码，当然也缺少应有的判空判错的必要流程，仅作为流程参考使用。

/**
 * 最简单的基于FFmpeg的格式转换(重新封装)
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#ifdef __cplusplus
extern "C"
{
#endif
#define __STDC_CONSTANT_MACROS
#ifdef _STDINT_H
#undef _STDINT_H
#endif
#include <stdint.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#ifdef __cplusplus
};
#endif

int main(int argc, char **argv){
    if(argc < 3){
        return -1;
    }
    const char* in_file = argv[1];
    const char* out_file = argv[2];
    AVFormatContext* in_ctx=NULL;
    AVFormatContext* out_ctx=NULL;

    av_register_all();
    
    avformat_open_input(&in_ctx,in_file,NULL,NULL);//input
    avformat_find_stream_info(in_ctx,NULL);
    avformat_alloc_output_context2(&out_ctx,NULL,NULL,out_file);//output
    av_dump_format(in_ctx, 0, in_file, 0);
    
    int aidx = -1,vidx = -1;
    int i = 0;
    int out_aidx = -1,out_vidx = -1;

    //get audio stream
    aidx = av_find_best_stream(in_ctx,AVMEDIA_TYPE_AUDIO,-1,-1,NULL,0);
    if(aidx >= 0){
        AVStream *st = avformat_new_stream(out_ctx,NULL);//add stream
        avcodec_parameters_copy(st->codecpar,in_ctx->streams[aidx]->codecpar);
        st->codecpar->codec_tag = 0;
        out_aidx = i;
        i++;
    }
    //get video stream
    vidx = av_find_best_stream(in_ctx,AVMEDIA_TYPE_VIDEO,-1,-1,NULL,0);
    if(vidx >= 0){
        AVStream *st = avformat_new_stream(out_ctx,NULL);//add stream
        avcodec_parameters_copy(st->codecpar,in_ctx->streams[vidx]->codecpar);
        st->codecpar->codec_tag = 0;
        out_vidx = i;
    }
    
    printf("in_aidx[%d] out_aidx[%d]; in_vidx[%d] out_vidx[%d]\n",aidx,out_aidx,vidx,out_vidx);
    av_dump_format(out_ctx, 0, out_file, 1);
    
    avio_open(&out_ctx->pb, out_file, AVIO_FLAG_WRITE);
    
    AVPacket* pkt =av_packet_alloc();
    AVStream *in_stream, *out_stream;
    
    avformat_write_header(out_ctx,NULL);
    while(av_read_frame(in_ctx,pkt) == 0){//DEMUX
        if(pkt->stream_index == aidx){
            in_stream  = in_ctx->streams[aidx];
            out_stream = out_ctx->streams[out_aidx];
        }
        else if(pkt->stream_index == vidx){
            in_stream  = in_ctx->streams[vidx];
            out_stream = out_ctx->streams[out_vidx];
        }else{
            printf("not aidx nor vidx!!!\n");
            av_packet_unref(pkt);
            continue;
        }
        
         //copy packet ???
        pkt->pts = av_rescale_q_rnd(pkt->pts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
        pkt->dts = av_rescale_q_rnd(pkt->dts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
        pkt->duration = av_rescale_q(pkt->duration, in_stream->time_base, out_stream->time_base);
        pkt->pos = -1;

        if(av_interleaved_write_frame(out_ctx, pkt) < 0){
            printf("av_interleaved_write_frame fail\n");
            //break;
        }
        av_packet_unref(pkt);
    }
    av_write_trailer(out_ctx);

    //close
    av_packet_free(&pkt);
    avformat_close_input(&in_ctx);
    avformat_free_context(out_ctx);

    return 0;
}